/*
	Estimates skill prices for each country and college graduate quality for each
	college, and then relates college graduate quality to development and notable
	accomplishments including entrepreneurship and top management, while also estimating
	the degree to which migrants are selected compared with non-migrants.
	Output:	Table 4 (vertically)
			Table 5
			Table 6
			Table 7
			Table 8
			Table A1
			Table A3
			Table A7
			Figure 2a, 2b
			Figure 3
			Figure 5a, 5b
			Figure 6
			Figure 7a, 7b
			Figure B1
			Figure B2
			Figure B3
			Figure B4
*/
 
local seed "C:\Users\jsock\Dropbox\Research\GD\International"

local dataPath "`seed'/Data"
local inputPath "`seed'/InputData"
local figurePath "`seed'/Replication/Figures"
local tablePath "`seed'/Replication/Tables"
local estimatePath "`seed'/Replication/Estimates"
local tempPath "`seed'/Replication/TempData"

********************************************************
* Read in country gdp to get universitycountry gdp
********************************************************

preserve

	clear
	
	insheet using "`inputPath'/Exchange_rates_2022.csv" , comma
	
	keep if year >= 2010 
	keep if year <= 2021 
	
	bys country_glassdoor: egen avg_gdppw = mean(gdppw)
	generate log_gdppw_uc = ln(avg_gdppw)
	
	bys country_glassdoor: keep if _n == 1

	keep country_glassdoor iso log_gdppw_uc	
	keep if log_gdppw_uc != .
	sort country_glassdoor
	
	save "`tempPath'/Country_gdppw.dta" , replace
	
restore 

********************************************************
* Read in nobel prizes by college
********************************************************

preserve

	clear
	
	insheet using "`inputPath'/nobel_prizes.csv" , comma

	rename university school 
	rename iso university_country_iso
	rename prizes nobel_prizes

	sort university_country_iso school 
	
	replace school = "The University of Texas at Austin" if school == "University of Texas, Austin"
	replace school = "University of California-Berkeley" if school == "University of California, Berkeley"
	replace school = "University of California-Los Angeles" if school == "University of California, Los Angeles"
	replace school = "University of North Carolina at Chapel Hill" if school == "University of North Carolina, Chapel Hill"
	replace school = "University of Minnesota-Twin Cities" if school == "University of Minnesota, Twin Cities"
	replace school = "University of Illinois at Urbana-Champaign" if school == "University of Illinois, Urbana-Champaign"
	replace school = "Technion Israel Institute of Technology" if school == "Technion (Israel Institute of Technology)"
	replace school = "Columbia University in the City of New York" if school == "Columbia University"
	replace school = "Brigham Young University-Provo" if school == "Brigham Young University"	
	replace school = "Georgia Institute of Technology-Main Campus" if school == "Georgia Institute of Technology"
	replace school = "University of Washington-Seattle Campus" if school == "University of Washington"
	replace school = "The University of Adelaide" if school == "University of Adelaide"
	replace school = "The University of Melbourne" if school == "University of Melbourne"
	replace school = "The University of Queensland" if school == "University of Queensland"
	replace school = "The University of Western Australia" if school == "University of Western Australia"
	replace school = "Vrije Universiteit Brussel" if school == "Free University of Brussels"
	replace school = "The University of British Columbia" if school == "University of British Columbia"
	replace school = "École Polytechnique Fédérale de Lausanne" if school == "Ecole Polytechnique Federale de Lausanne"
	replace school = "Université de Genève" if school == "University of Geneva"
	replace school = "Peking University" if school == "Peking University Health Science Center"
	replace school = "Goethe University Frankfurt" if school == "Goethe University (Frankfurt)"
	replace school = "Ruprecht-Karls-Universität Heidelberg" if school == "Heidelberg University"
	replace school = "Technische Universität Darmstadt" if school == "Technical University of Darmstadt (TU Darmstadt)"
	replace school = "Universität Stuttgart" if school == "Technical University of Stuttgart"
	replace school = "University of Düsseldorf" if school == "University of Dusseldorf"
	replace school = "University of Göttingen" if school == "University of Gottingen"
	replace school = "Ludwig-Maximilians-Universität München" if school == "University of Munich"
	replace school = "University of Tübingen" if school == "University of Tubingen"
	replace school = "École Normale Supérieure" if school == "Ecole Normale Superieure"
	replace school = "École Polytechnique" if school == "Ecole Polytechnique"
	replace school = "Université de Poitiers" if school == "University of Poitiers"
	replace school = "Université de Strasbourg" if school == "University of Strasbourg"
	replace school = "King's College London" if school == "Kingâs College London"
	replace school = "The London School of Economics and Political Science" if school == "London School of Economics"
	replace school = "Queen Mary University of London" if school == "Queen Mary University of London (QMUL)"
	replace school = "University College London" if school == "University College, London"
	replace school = "The University of Edinburgh" if school == "University of Edinburgh"
	replace school = "The University of Nottingham" if school == "University of Nottingham"
	replace school = "The University of Sheffield" if school == "University of Sheffield"
	replace school = "Eötvös Loránd University" if school == "Eotvos Lorand University (University of Budapest)"
	replace school = "Trinity College Dublin" if school == "Trinity College, Dublin"
	replace school = "National Autonomous University of Mexico" if school == "National Autonomous University of Mexico (UNAM)"
	replace school = "Moscow Institute of Physics and Technology" if school == "Moscow Institute of Physics and Technology (MIPT)"
	replace school = "Lunds universitet" if school == "Lund University"
	replace school = "Istanbul Üniversitesi" if school == "Istanbul University"
	replace school = "Augsburg University" if school == "Augsburg College"
	replace school = "California Institute of Technology" if school == "Caltech"
	replace school = "CUNY City College" if school == "City College of New York"
	replace school = "Cooper Union for the Advancement of Science and Art" if school == "Cooper Union"
	replace school = "Massachusetts Institute of Technology" if school == "MIT"
	replace school = "Pomona College" if school == "Pomona University"
	replace school = "University of California-Davis" if school == "University of California, Davis"
	replace school = "University of California-Santa Barbara" if school == "University of California, Santa Barbara"
	replace school = "University of California-Riverside" if school == "University of California, Riverside"
	replace school = "University of California-San Diego" if school == "University of California, San Diego"
	replace school = "University of Maryland-College Park" if school == "University of Maryland"
	replace school = "University of Michigan-Ann Arbor" if school == "University of Michigan"
	replace school = "University of Minnesota-Duluth" if school == "University of Minnesota, Duluth"
	replace school = "University of Nebraska-Lincoln" if school == "University of Nebraska, Lincoln"
	replace school = "Virginia Polytechnic Institute and State University" if school == "Virginia Tech"
	replace school = "Washington University in St Louis" if school == "Washington University in St. Louis"
	replace school = "Saint Petersburg State Electrotechnical University" if school == "St. Petersburg Electrotechnical University (LETI)"
	replace school = "The Maharaja Sayajirao University of Baroda" if school == "Maharaja Sayajirao University of Baroda"
	replace school = "Grenoble Alpes University" if school == "University of Grenoble"
	replace school = "Friedrich Schiller University Jena" if school == "University of Jena"
	replace school = "The University of Tokyo" if school == "University of Tokyo"
	replace school = "The University of Tokushima" if school == "University of Tokushima"
	
	replace university_country_iso = "CAN" if school == "University of Alberta"

	sort university_country_iso school
	
	save "`tempPath'/University_nobel_prizes.dta" , replace
	
	bys university_country_iso: egen country_nobel_prizes = sum(nobel_prizes)
	
	bys university_country_iso: keep if _n == 1

	keep university_country_iso country_nobel_prizes
	
	save "`tempPath'/Country_nobel_prizes.dta" , replace

restore

********************************************************
* Read in CEOs of S&P500 in 2005 by college
********************************************************

preserve

	clear
	
	insheet using "`inputPath'/sp500_uni.csv" , comma

	rename iso university_country_iso
	rename university school 
	rename number university_ceos

	drop if university_country_iso == ""
		
	* Rename some schools to line up with the names in Glassdoor
	replace school = "University of Pennsylvania" if school == "Wharton School of the University of Penn."
	replace school = "University of Wisconsin-Madison" if school == "University of Wisconsin"
	replace school = "University of California-Los Angeles" if school == "University of California at Los Angeles"
	replace school = "University of California-Berkeley" if school == "University of California at Berkley"
	replace school = "Pennsylvania State University-Main Campus" if school == "Pennsylvania State University"
	replace school = "Drake University" if school == "Drake"
	replace school = "The University of Texas at Austin" if school == "University of Texas"
	replace school = "University of North Carolina at Chapel Hill" if school == "University of North Carolina"
	replace school = "University of Missouri-Columbia" if school == "University of Missouri"
	replace school = "University of Washington-Seattle Campus" if school == "University of Washington"
	replace school = "Ohio State University-Main Campus" if school == "Ohio State University"
	replace school = "University of Massachusetts-Amherst" if school == "University of Massachusetts"
	replace school = "University of Massachusetts-Amherst" if school == "Isenberg School of Management"
	replace school = "University of Colorado Boulder" if school == "University of Colorado"
	replace school = "Rutgers University-New Brunswick" if school == "Rutgers University"
	replace school = "Louisiana State University and Agricultural & Mechanical College" if school == "Louisiana State University"
	replace school = "CUNY City College" if school == "City College of New York"
	replace school = "The University of Tennessee-Knoxville" if school == "University of Tennessee"
	replace school = "University of Illinois at Urbana-Champaign" if school == "University of Illinois"
	replace school = "University of Cincinnati-Main Campus" if school == "University of Cincinnati"
	replace school = "The University of Alabama" if school == "University of Alabama"
	replace school = "Purdue University-Main Campus" if school == "Purdue University"
	replace school = "Oklahoma State University-Main Campus" if school == "Oklahoma State University"
	replace school = "Miami University-Oxford" if school == "Miami University of Ohio"
	replace school = "Loyola University Chicago" if school == "Loyola University"
	replace school = "Georgia Institute of Technology-Main Campus" if school == "Georgia Institute of Technology"
	replace school = "Arizona State University-Tempe" if school == "Arizona State"
	replace school = "Brigham Young University-Provo" if school == "Brigham Young University"
	replace school = "California State Polytechnic University-Pomona" if school == "California State Polytechnic University"
	replace school = "Columbia University in the City of New York" if school == "Columbia College"
	replace school = "Indiana University-Bloomington" if school == "Indiana University"
	replace school = "LIU-University Center Campus" if school == "Long Island University"
	replace school = "North Carolina State University at Raleigh" if school == "North Carolina State"
	replace school = "Ohio University-Main Campus" if school == "Ohio University"
	replace school = "University of Maryland-College Park" if school == "University of Maryland"
	replace school = "University of Michigan-Ann Arbor" if school == "University of Michigan"
	replace school = "University of Minnesota-Twin Cities" if school == "University of Minnesota"
	replace school = "University of Nebraska-Lincoln" if school == "University of Nebraska"
	replace school = "University of Pittsburgh-Pittsburgh Campus" if school == "University of Pittsburgh"
	replace school = "The University of British Columbia" if school == "University of British Columbia"
	replace school = "Imperial College London" if school == "Imperial College"
	replace school = "University of Virginia-Main Campus" if school == "University of Virginia"
	replace school = "University of South Florida-St Petersburg" if school == "University of South Florida"
	replace school = "University of Oklahoma-Norman Campus" if school == "University of Oklahoma"
	replace school = "University of New Hampshire-Main Campus" if school == "University of New Hampshire"
	replace school = "University of Hawaii at Manoa" if school == "University of Hawaii"
	replace school = "University of Akron Main Campus" if school == "University of Akron"
	replace school = "United States Air Force Academy" if school == "US Air Force Academy"
	replace school = "Tulane University of Louisiana" if school == "Tulane University"
	replace school = "Texas Tech University" if school == "Texas Technological University"
	replace school = "Texas State University" if school == "Texas State University-San Marcos"
	replace school = "Saint Joseph's University" if school == "St. Joseph's University"
	replace school = "Slippery Rock University of Pennsylvania" if school == "Slippery Rock University"
	replace school = "Providence College" if school == "Providence College-Rhode Island"
	replace school = "North Dakota State University-Main Campus" if school == "North Dakota State University"
	replace school = "Florida Agricultural and Mechanical University" if school == "Florida A&M University"
	replace school = "University College Dublin" if school == "University College-Dublin"
	replace school = "Technion Israel Institute of Technology" if school == "Technion-Israel Institute of Technology"
	replace school = "Erasmus Universiteit Rotterdam" if school == "Eramus Universiteit Rotterdam"
	replace school = "Universiteit van Amsterdam" if school == "University of Amsterdam"
	replace school = "St John's University-New York" if school == "St. John's University"
	replace school = "Santa Clara University" if school == "University of Santa Clara"
	replace school = "Point Park University" if school == "Point Park College"
	
	replace school = "The University of Queensland" if school == "University of Queensland"
	replace school = "Universidade Presbiteriana Mackenzie" if school == "MacKenzie University"
	replace school = "University of Lausanne" if school == "Lausanne University"
	replace school = "HEC Paris" if school == "Ecole des Hautes Etudes Commerciales, Paris,France"
	replace school = "Banaras Hindu University" if school == "Banaras University (India)"
	replace school = "The Maharaja Sayajirao University of Baroda" if school == "M.S. University (India)"
	replace school = "American University of Beirut" if school == "American Univ. Beirut"
	replace school = "Radboud University" if school == "University of Nijmegen"
	replace school = "Christian Brothers University" if school == "Christian Brothers College"
	replace school = "Loyola University Maryland" if school == "Loyola College of Baltimore"	
	replace school = "Pittsburg State University" if school == "Pittsburgh State University"	
	replace school = "Saint Peter's University" if school == "St. Peter's College"	
	replace school = "University of Detroit Mercy" if school == "University of Detroit"	
	replace school = "University of Wisconsin-Milwaukee" if school == "University of Milwaukee"	
	replace school = "Missouri University of Science and Technology" if school == "University of Missouri-Rolla"	
	replace school = "The University of Montana" if school == "University of Montana"	

	replace school = "Western University" if school == "University of Western Ontario"
	replace university_country_iso = "CAN" if school == "Western University"
	
	collapse (sum) university_ceos , by(school university_country_iso)
	sort university_country_iso university_ceos school
	
	save "`tempPath'/University_ceos.dta" , replace
	
	bys university_country_iso: egen country_ceos = sum(university_ceos)
	
	bys university_country_iso: keep if _n == 1

	keep university_country_iso country_ceos
	
	save "`tempPath'/Country_ceos.dta" , replace
	
restore

********************************************************
* Read in CEOs of S&P500 in 2021 by college
********************************************************

preserve

	clear
	
	insheet using "`inputPath'/SP500_2021.csv" , comma

	rename university school 
	rename iso university_country_iso
	rename ceos university_ceos
	
	drop if university_country_iso == ""
			
	replace university_country_iso = "CHN" if school == "Tsinghua University"
	replace university_country_iso = "CAN" if school == "Simon Fraser University"
	
	* Rename some schools to line up with the names in Glassdoor
	replace school = "Pennsylvania State University-Main Campus" if school == "Pennsylvania State University"
	replace school = "University of Washington-Seattle Campus" if school == "University of Washington"
	replace school = "Rutgers University-New Brunswick" if school == "Rutgers University"
	replace school = "Louisiana State University and Agricultural & Mechanical College" if school == "Louisiana State University"
	replace school = "The University of Alabama" if school == "University of Alabama"
	replace school = "Purdue University-Main Campus" if school == "Purdue University"
	replace school = "Oklahoma State University-Main Campus" if school == "Oklahoma State University"
	replace school = "Indiana University-Bloomington" if school == "Indiana University"
	replace school = "Ohio University-Main Campus" if school == "Ohio University"
	replace school = "University of Michigan-Ann Arbor" if school == "University of Michigan"
	replace school = "University of Pittsburgh-Pittsburgh Campus" if school == "University of Pittsburgh"
	replace school = "The University of British Columbia" if school == "University of British Columbia"
	replace school = "University of Virginia-Main Campus" if school == "University of Virginia"
	replace school = "University of Oklahoma-Norman Campus" if school == "University of Oklahoma"
	replace school = "University of New Hampshire-Main Campus" if school == "University of New Hampshire"
	replace school = "North Dakota State University-Main Campus" if school == "North Dakota State University"
	replace school = "Christian Brothers University" if school == "Christian Brothers College"
	replace school = "Instituto Tecnológico de Buenos Aires" if school == "Instituto Tecnologico de Buenos Aires"
	replace school = "Universidad Tecnológica Nacional" if school == "National Technological University (Argentina)"
	replace school = "The University of Melbourne" if school == "University of Melbourne"
	replace school = "The University of New South Wales" if school == "University of New South Wales"
	replace school = "Wirtschaftsuniversität Wien" if school == "University of Economics in Vienna"
	replace school = "Université Catholique de Louvain" if school == "Universite Catholique de Louvain"
	replace school = "Fundação Getúlio Vargas" if school == "Fundacao Getulia Vargas (EAESP, Sao Paulo)"
	replace school = "Universidade Estadual de Campinas" if school == "Universidade Estadual de Campinas (UNICAMP, Sao Paulo)"
	replace school = "Federal University of Minas Gerais" if school == "Universidade Federal de Minais Gerais"
	replace school = "Royal Military College of Canada" if school == "Royal Military College"
	replace school = "École Polytechnique Fédérale de Lausanne" if school == "Ecole Polytechnique Federale de Lausanne"
	replace school = "Hochschule für Wirtschaft und Recht Berlin" if school == "Berlin School of Economics and Law"
	replace school = "Universität Passau" if school == "University of Passau"
	replace school = "Universidad Politécnica de València" if school == "Polytechnic University of Valencia"
	replace school = "Université de Bordeaux" if school == "ENSEIRB-MATMECA (University of Bordeaux)"
	replace school = "Arts et Métiers ParisTech" if school == "Ecole Nationale Superieure D’Arts Et Metiers"
	replace school = "Université de Bordeaux" if school == "University Victor Segalen, Bourdeaux II"
	replace school = "The University of Nottingham" if school == "University of Nottingham"
	replace school = "Ulster University" if school == "University of Ulster"
	replace school = "Indian Institute of Technology Bombay" if school == "Indian Institute of Technology, Bombay"
	replace school = "Indian Institute of Technology Kanpur" if school == "Indian Institute of Technology, Kanpur"
	replace school = "Indian Institute of Technology Kharagpur" if school == "Indian Institute of Technology, Kharagpur"
	replace school = "Indian Institute of Technology Madras" if school == "Indian Institute of Technology, Madras"
	replace school = "Sapienza Università di Roma" if school == "University of Rome"
	replace school = "Uppsala Universitet" if school == "Uppsala University"
	replace school = "Bogazici University" if school == "Bosporus University"
	replace school = "University of South Africa" if school == "University of South Afria"
	replace school = "Arizona State University-Tempe" if school == "Arizona State University"
	replace school = "Brigham Young University-Provo" if school == "Brigham Young University"
	replace school = "California State University-Long Beach" if school == "California State University, Long Beach"
	replace school = "California State University-Northridge" if school == "California State University, Northridge"
	replace school = "California State University-San Bernardino" if school == "California State University, San Bernadino"
	replace school = "California University of Pennsylvania" if school == "California University, Pennsylvania"
	replace school = "California Institute of Technology" if school == "Caltech"
	replace school = "Carnegie Mellon University" if school == "Carnegie-Mellon University"
	replace school = "Claremont McKenna College" if school == "Claremont McKenna"
	replace school = "College of Saint Benedict and Saint John's University" if school == "College of St. Benedict"
	replace school = "College of William and Mary" if school == "College of William & Mary"
	replace school = "Columbia University in the City of New York" if school == "Columbia University"
	replace school = "Franklin and Marshall College" if school == "Franklin & Marshall College"
	replace school = "Georgia Institute of Technology-Main Campus" if school == "Georgia Tech"
	replace school = "Hobart William Smith Colleges" if school == "Hobart College"
	replace school = "Massachusetts Institute of Technology" if school == "MIT"
	replace school = "Miami University-Oxford" if school == "Miami University"
	replace school = "Minnesota State University Moorhead" if school == "Minnesota State University, Moorhead"
	replace school = "Missouri University of Science and Technology" if school == "Missouri University of Science & Technology"
	replace school = "New Mexico State University-Main Campus" if school == "New Mexico State University"
	replace school = "North Carolina State University at Raleigh" if school == "North Carolina State University"
	replace school = "Ohio State University-Main Campus" if school == "Ohio State University, Columbus"
	replace school = "Otterbein University" if school == "Otterbein College"
	replace school = "Salisbury University" if school == "Salisbury University (Maryland)"
	replace school = "St Bonaventure University" if school == "St. Bonaventure University"
	replace school = "Saint John Fisher College" if school == "St. John Fisher College"
	replace school = "Saint Johns University" if school == "St. John’s University"
	replace school = "Saint Josephs College" if school == "St. Joseph’s College"
	replace school = "SUNY Polytechnic Institute" if school == "State University of New York Institute of Technology at Utica"
	replace school = "Texas A & M University-College Station" if school == "Texas A&M University"
	replace school = "Citadel Military College of South Carolina" if school == "The Citadel"
	replace school = "United States Military Academy" if school == "United States Military Academy (West Point)"
	replace school = "University of Arkansas" if school == "University of Arkansas, Fayetteville"
	replace school = "University of California-Berkeley" if school == "University of California, Berkeley"
	replace school = "University of California-Davis" if school == "University of California, Davis"
	replace school = "University of California-Irvine" if school == "University of California, Irvine"
	replace school = "University of California-Los Angeles" if school == "University of California, Los Angeles"
	replace school = "University of California-Santa Cruz" if school == "University of California, Santa Cruz"
	replace school = "University of Colorado Boulder" if school == "University of Colorado, Boulder"
	replace school = "University of Illinois at Chicago" if school == "University of Illinois, Chicago"
	replace school = "University of Illinois at Urbana-Champaign" if school == "University of Illinois, Urbana-Champaign"
	replace school = "University of Massachusetts-Amherst" if school == "University of Massachusetts, Amherst"
	replace school = "University of Michigan-Dearborn" if school == "University of Michigan, Dearborn"
	replace school = "University of Minnesota-Twin Cities" if school == "University of Minnesota, Twin Cities"
	replace school = "University of Missouri-Columbia" if school == "University of Missouri, Columbia"
	replace school = "University of Missouri-St Louis" if school == "University of Missouri, St. Louis"
	replace school = "University of Nebraska-Lincoln" if school == "University of Nebraska, Lincoln"
	replace school = "University of Nevada-Las Vegas" if school == "University of Nevada, Las Vegas (UNLV)"
	replace school = "University of North Carolina at Chapel Hill" if school == "University of North Carolina, Chapel Hill"
	replace school = "University of St Thomas" if school == "University of St. Thomas (Minnesota)"
	replace school = "The University of Tennessee-Chattanooga" if school == "University of Tennessee, Chattanooga"
	replace school = "The University of Tennessee-Martin" if school == "University of Tennessee, Martin"
	replace school = "The University of Texas at Austin" if school == "University of Texas, Austin"
	replace school = "University of Wisconsin-Madison" if school == "University of Wisconsin, Madison"
	replace school = "University of Wisconsin-Milwaukee" if school == "University of Wisconsin, Milwaukee"
	replace school = "University of Wisconsin-River Falls" if school == "University of Wisconsin, River Falls"
	replace school = "University of Wisconsin-Superior" if school == "University of Wisconsin, Superior"
	replace school = "University of Wisconsin-Whitewater" if school == "University of Wisconsin, Whitewater"
	replace school = "Virginia Polytechnic Institute and State University" if school == "Virginia Tech"
	
	collapse (sum) university_ceos , by(school university_country_iso)
	sort university_country_iso university_ceos school
	
	save "`tempPath'/University_ceos_2021.dta" , replace
	
	bys university_country_iso: gen country_ceos_2021 = _N
	
	bys university_country_iso: keep if _n == 1

	keep university_country_iso country_ceos_2021
	
	save "`tempPath'/Country_ceos_2021.dta" , replace

restore		

********************************************************
* Read in patents data by country of study
********************************************************

preserve

	clear
	
	insheet using "`inputPath'/patents.csv" , comma

	rename iso university_country_iso

	drop if university_country_iso == ""

	bys university_country_iso: keep if _n == 1

	keep university_country_iso patents patentspc
	
	save "`tempPath'/Country_patents.dta" , replace
	
restore

********************************************************
* Read in country gdp to get universitycountry gdp
********************************************************

clear 
set more off
set matsize 10000
set scheme s1mono

* Set path and load data 
insheet using "`dataPath'/Salaries_international_dataset_main.csv", comma
drop v1

drop if jobtitle == ""

drop metro shortname  
drop city basecurrency country_iso   
drop sectorname iscurrentjobflag 

*--------------------------------
* Thresholds used for sample selection
*--------------------------------
.
scalar country_premia_thresh = 25

scalar selection_thresh = 25

scalar school_thresh = 25

scalar school_major_thresh = 25

scalar pctThresh = 0.05

*--------------------------------
* Exclude users that leave more than 10 reviews
*--------------------------------

sort fk_userid yearofsalary dateval salid

by fk_userid : gen obsNum = _n

by fk_userid : gen userReviews = _N

drop if userReviews > 10

drop userReviews

*--------------------------------
* Generate additional variables
*--------------------------------

generate exp = yearsofrelevantexpnumber
generate exp_sqrd = exp ^ 2
drop yearsofrelevantexpnumber

generate logbase = ln(basesalary * ppp_xrat)

generate log_gdppw = ln(gdppw)

drop if ppp_xrat == .

*--------------------------------
* Exclude outliers in base pay
*--------------------------------

scalar scalingThresh = 10

generate realbase = basesalary * ppp_xrat
generate outside_thresh =  (realbase < (1/scalingThresh) * gdppw) | (realbase > scalingThresh * gdppw) 
drop realbase

*------------------------------------------
* Create Ranking Bins
*------------------------------------------

generate rankBin = ""
replace rankBin = "A_01_20" if inrange(world_rank,1,20)
replace rankBin = "B_21_50" if inrange(world_rank,21,50)
replace rankBin = "C_51_100" if inrange(world_rank,51,100)
replace rankBin = "D_101_250" if inrange(world_rank,101,250)
replace rankBin = "E_251_500" if inrange(world_rank,251,500)
replace rankBin = "F_501_1000" if inrange(world_rank,501,1000)
replace rankBin = "G_1001_2000" if inrange(world_rank,1001,2000)
replace rankBin = "H_UNRANKED" if world_rank == . & universitycountry != ""

generate national_rank_pct = national_rank / numberuniversities

*------------------------------------------
* Create Major of Study Bins
*------------------------------------------

generate majorStem = ""
replace majorStem = "Stem" if grpmajor == "Biological Sciences" | grpmajor == "Engineering" | grpmajor == "Physical Sciences" | grpmajor == "Technology"   
replace majorStem = "NonStem" if grpmajor == "Business" | grpmajor == "Social Sciences" | grpmajor == "Arts and Humanities" | grpmajor == "Communication" | grpmajor == "Education" | grpmajor == "Health Service" | grpmajor == "Social Service" 

*-------------------------------- 
* Drop Singapore Polytechnics which aren't considered by WHED
*--------------------------------

drop if school == "Nanyang Polytechnic"
drop if school == "Ngee Ann Polytechnic"
drop if school == "Republic Polytechnic"
drop if school == "Singapore Polytechnic"
drop if school == "Temasek Polytechnic"

*--------------------------------
* Generate indicators for wage in same country as university or not
*--------------------------------

generate home_country = countryname == universitycountry & universitycountry != ""

generate foreign_country = countryname != universitycountry & universitycountry != ""

*--------------------------------
* Generate indicators for valid in education analysis
*--------------------------------

* For first degree

generate hasDegree = degree != "UNMATCHED" & degree != "missing" & degree != ""  & degree != "HIGHSCHOOL"

generate uniDegree = degree == "BACHELORS"  

generate hasSchool = school != ""

* For second degree

generate hasDegree_2 = degree_2 != "UNMATCHED" & degree_2 != "missing" & degree_2 != ""  & degree != "HIGHSCHOOL"

generate uniDegree_2 = degree_2 != "ASSOCIATES" & degree_2 != "DIPLOMA"  & degree_2 != "HIGHSCHOOL" & degree_2 != "BACHELORS"

generate hasSchool_2 = school_2 != ""

replace degree_2 = "NONE" if degree_2 == ""

*--------------------------------
* Add work country name
*--------------------------------

generate country_glassdoor = countryname

merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
drop _merge

rename iso work_country_iso
rename log_gdppw_uc log_gdppw_work_country 

drop country_glassdoor 

*--------------------------------
* Add country of study name
*--------------------------------

generate country_glassdoor = universitycountry

merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
drop _merge

rename iso university_country_iso
rename log_gdppw_uc log_gdppw_university_country 

drop country_glassdoor 

*--------------------------------
* Keep baseline sample 
*--------------------------------

drop if employertypecode == "SELF_EMPLOYED" 

drop if outside_thresh

generate valid_educ = uniDegree & hasDegree & hasSchool & universitycountry != ""
keep if valid_educ 

* Determine share with usable major information
generate hasMajor = majorStem != ""
summarize hasMajor , detail
scalar share_with_major = r(mean)

********************************************************
* SAMPLE SIZE FOR COUNTRY PREMIA Z_C (MOVERS)
********************************************************

*--------------------------------
* Determine top destinations
*--------------------------------

sort fk_userid yearofsalary dateval salid

by fk_userid: generate destination = countryname[_n+1]

generate migrant = destination != countryname & destination != ""

sort destination 
by destination : egen destinationMigrants = sum(migrant)
by destination : replace destinationMigrants = . if destination == ""
by destination : replace destinationMigrants = . if _n > 1

replace destinationMigrants = destinationMigrants * -1

sort destinationMigrants 

* Determine top destination (1.0% of migrants)

egen totalMigrants = sum(destinationMigrants * -1)
	
generate migrant_share = -1 * destinationMigrants / totalMigrants
	
generate top_destination = migrant_share >= 0.01 & migrant_share != .

bys destination : egen topDestination = max(top_destination)

* Output migrant totals / shares

preserve

	keep if destinationMigrants != .
	
	bys destination: keep if _n == 1
	
	sort destinationMigrants

	replace destinationMigrants = destinationMigrants * -1 
	
	keep destination destinationMigrants totalMigrants migrant_share
	
	outsheet using "`estimatePath'\Migrants_to_each_destination.csv" , comma replace
	
restore

drop destinationMigrants top_destination totalMigrants 

*--------------------------------
* Determine # of migrants to top ten destination for each origin
*--------------------------------

bys countryname : egen migrantsOverall = sum(migrant)  

bys countryname : egen migrantsTop = sum(migrant * topDestination)  

generate valid_country_premia = migrantsTop >= country_premia_thresh & migrantsTop != .

********************************************************
* STEP 0: SETUP FOR SKILL LOSS 
********************************************************

sort fk_userid yearofsalary dateval salid
 
by fk_userid: gen prev_country = countryname[_n-1]

*--------------------------------
* Determine if migrated
*--------------------------------

generate migrated = countryname != prev_country & prev_country != ""

sort fk_userid yearofsalary dateval salid

by fk_userid: gen already_migrated = sum(migrated)

*--------------------------------
* Generate variable capturing delta_gdppw for migrants - between countries of work
*--------------------------------

sort fk_userid yearofsalary dateval salid
by fk_userid: gen prev_gdp = log_gdppw_work_country[_n-1]

* Absolute value

generate delta_log_gdppw = abs(log_gdppw_work_country - prev_gdp)

generate migrant_delta_gdppw_work = 0
by fk_userid: replace migrant_delta_gdppw_work = delta_log_gdppw if migrated == 1 
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-1] if migrated == 0 & migrated[_n-1] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-2] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-3] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-4] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-5] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-6] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-7] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 1
by fk_userid: replace migrant_delta_gdppw_work = migrant_delta_gdppw_work[_n-8] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 0 & migrated[_n-8] == 1

* Only positive

generate delta_log_gdppw_pos = log_gdppw_work_country - prev_gdp
replace delta_log_gdppw_pos = 0 if delta_log_gdppw_pos < 0

generate migrant_delta_gdppw_work_pos = 0
by fk_userid: replace migrant_delta_gdppw_work_pos = delta_log_gdppw_pos if migrated == 1 
by fk_userid: replace migrant_delta_gdppw_work_pos = migrant_delta_gdppw_work_pos[_n-1] if migrated == 0 & migrated[_n-1] == 1
by fk_userid: replace migrant_delta_gdppw_work_pos = migrant_delta_gdppw_work_pos[_n-2] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 1
by fk_userid: replace migrant_delta_gdppw_work_pos = migrant_delta_gdppw_work_pos[_n-3] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 1
by fk_userid: replace migrant_delta_gdppw_work_pos = migrant_delta_gdppw_work_pos[_n-4] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 1
by fk_userid: replace migrant_delta_gdppw_work_pos = migrant_delta_gdppw_work_pos[_n-5] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 1
by fk_userid: replace migrant_delta_gdppw_work_pos = migrant_delta_gdppw_work_pos[_n-6] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 1
by fk_userid: replace migrant_delta_gdppw_work_pos = migrant_delta_gdppw_work_pos[_n-7] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 1
by fk_userid: replace migrant_delta_gdppw_work_pos = migrant_delta_gdppw_work_pos[_n-8] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 0 & migrated[_n-8] == 1

drop delta_log_gdppw delta_log_gdppw_pos

*--------------------------------
* Generate coarse desination FE for migration 
*	Keep top destinations alone, group others by continent
*--------------------------------

	* Top destinations
	
	preserve

		keep destination topDestination
		rename destination countryname
		rename topDestination is_topDestination
		
		bys countryname : keep if _n == 1
		
		tempfile in_results
		save `in_results', emptyok
		
	restore

	merge m:1 countryname using `in_results'
	drop if _merge == 2
	drop _merge

	generate coarseLocation = ""
	replace coarseLocation = countryname if is_topDestination == 1
	drop is_topDestination

	* Continents
	
	preserve

		clear
		
		insheet using "`inputPath'\Country_continents.csv" , comma
		
		rename v2 countryname
		rename v7 continent
		keep countryname continent
		
		bys countryname : keep if _n == 1
		
		replace countryname = "Czech Republic" if countryname == "Czechia"
		replace countryname = "Iran" if countryname == "Iran (Islamic Republic of)"
		replace countryname = "Russia" if countryname == "Russian Federation"
		replace countryname = "Hong Kong" if countryname == "China, Hong Kong Special Administrative Region"
		replace countryname = "South Korea" if countryname == "Republic of Korea"
		replace countryname = "United Kingdom" if countryname == "United Kingdom of Great Britain and Northern Ireland"
		replace countryname = "United States" if countryname == "United States of America"
		replace countryname = "Vietnam" if countryname == "Viet Nam"
		
		tempfile in_results
		save `in_results', emptyok
		
	restore

	merge m:1 countryname using `in_results'
	drop if _merge == 2
	drop _merge

	replace coarseLocation = continent if coarseLocation == "" & valid_country_premia
	
	* Destination FE
	
	tab coarseLocation if valid_country_premia == 1, gen(c_)
	foreach my_var of varlist c_*{
		replace `my_var' = 0 if ~(migrated | already_migrated >= 1)
	}	
	
	label var c_1 "Destination: Other Africa"
	label var c_2 "Destination: Other Asia"
	label var c_3 "Destination: Australia"
	label var c_4 "Destination: Canada"
	label var c_5 "Destination: Other Europe"
	label var c_6 "Destination: France"
	label var c_7 "Destination: Germany"
	label var c_8 "Destination: India"
	label var c_9 "Destination: Ireland"
	label var c_10 "Destination: Netherlands"
	label var c_11 "Destination: Other North America"
	label var c_12 "Destination: Other Oceania"
	label var c_13 "Destination: Singapore"
	label var c_14 "Destination: Other South America"
	label var c_15 "Destination: Spain"
	label var c_16 "Destination: Sweden"
	label var c_17 "Destination: United Kingdom"
	label var c_18 "Destination: United States"

	* Roll forward origin locations 
	
	sort fk_userid yearofsalary dateval salid
 
	by fk_userid: gen origin = countryname[_n-1] if countryname[_n-1] != "" 
	by fk_userid: replace origin = origin[_n-1] if migrated == 0 & migrated[_n-1] == 1
	by fk_userid: replace origin = origin[_n-2] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 1
	by fk_userid: replace origin = origin[_n-3] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 1
	by fk_userid: replace origin = origin[_n-4] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 1
	by fk_userid: replace origin = origin[_n-5] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 1
	by fk_userid: replace origin = origin[_n-6] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 1
	by fk_userid: replace origin = origin[_n-7] if migrated == 0 & migrated[_n-1] == 0 & migrated[_n-2] == 0 & migrated[_n-3] == 0 & migrated[_n-4] == 0 & migrated[_n-5] == 0 & migrated[_n-6] == 0 & migrated[_n-7] == 1
	
	* Convert origins into coarse locations
	
	preserve
	
		keep countryname coarseLocation
		
		bys countryname : keep if _n == 1
	
		rename countryname origin
		rename coarseLocation coarseOrigin

		tempfile in_results
		save `in_results', emptyok
		
	restore

	merge m:1 origin using `in_results'
	drop _merge
	
*--------------------------------
* Merge in bilateral measures between origin and destination
*--------------------------------

	sort fk_userid yearofsalary dateval salid

	generate iso3_d = work_country_iso
	
	by fk_userid: generate iso3_o = work_country_iso[_n-1]

	by fk_userid: replace iso3_o = work_country_iso if work_country_iso[_n-1] == "" 

	preserve
		
		clear
		insheet using "`dataPath'/Gravity_origin_destination.csv" , comma
		drop v1
		
		tempfile tempfile
		save `tempfile', emptyok
		
	restore

	merge m:1 iso3_o iso3_d using `tempfile'
	drop if _merge == 2
	drop _merge	
	
	generate ln_dist = ln(dist)
	
********************************************************
* STEP 1: ESTIMATE COUNTRY FE Z_C
********************************************************

reghdfe logbase exp exp_sqrd c_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fe_country=countryname yearofsalary ) vce(cluster countryname)

* Determine number of migrants in sample

preserve

	keep if e(sample)

	bys fk_userid countryname: gen new_worker_country = _n == 1
	
	bys fk_userid : egen worker_countries = sum(new_worker_country)
	
	keep if worker_countries > 1 

	bys fk_userid : keep if _n == 1
	
	sum yearofsalary
	
	scalar migrants_step_one = r(N)
	
restore

* Store estimates

bys countryname : egen obs_step1 =sum(e(sample))

bys countryname : egen z_c = max(fe_country)

generate hasCountryPremia = z_c != .

generate tau = 0
foreach my_var of varlist c_* {
	
	bys coarseLocation : egen fill_`my_var' = max(`my_var')
	replace tau = tau + `my_var' * _b["`my_var'"] 
	replace tau = tau + fill_`my_var' * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 
	drop fill_*
	
}
foreach my_var of varlist migrant_delta_gdppw_work {

	replace tau = tau + `my_var' * _b["`my_var'"]
	replace tau = tau + abs(log_gdppw_work_country - log_gdppw_university_country) * _b["`my_var'"] if `my_var' == 0 & universitycountry != countryname 

}

generate logbase_minus_z_c = logbase - z_c - tau

*--------------------------------
* OUTPUT Z_C AND MAKE SURE 48 COUNTRIES 
*--------------------------------

preserve

	keep if z_c != .
	
	keep countryname z_c obs_step1 log_gdppw_work_country
	
	order countryname z_c obs_step1 log_gdppw_work_country
	
	bys countryname : keep if _n == 1
	
	generate sort_z_c = -1 * z_c
	
	sort sort_z_c
	
	drop sort_z_c

	eststo clear
	
	eststo: reg z_c log_gdppw_work_country 
		
	* Correlation of q with baseline
	pwcorr z_c log_gdppw_work_country
	matrix corrMat = r(C)
	local corrVal : di %4.2f corrMat[1,2]
	estadd loc corr_gdppw "`corrVal'", replace 			

	* Store information for plotting
	local texFile = "`tablePath'/Elasticity_zc_gdppw.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
		stats(corr_gdppw N r2_a, 
						fmt(%3.2f %9.0g %3.2f) 
						labels("GDPPW correlation with z" 
							   "N" 
							   "Adjusted R$^2$")) 
		drop(_cons )
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \\
		)
		posthead(\\)
		prefoot(\\ \hline \noalign{\smallskip} )
		postfoot(
			\hline \hline \noalign{\smallskip}
			\end{tabular}
		)
	; 
	#delimit cr

	drop log_gdppw_work_country

	outsheet using "`estimatePath'\Premium_z_c.csv" , comma replace

restore

********************************************************
* Output Regression Results for Skill Loss
********************************************************

	*------------------------------
	* Cluster by country of work
	*------------------------------

	bys countryname : gen newCountry = _n == 1

	tab coarseLocation if valid_country_premia == 1, gen(o_)
	foreach my_var of varlist o_*{
		replace `my_var' = 0 if ~(migrated | already_migrated >= 1)
	}	

	eststo clear

	* Destination FE
	
	eststo: reghdfe logbase exp exp_sqrd o_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fe1=countryname yearofsalary ) vce(cluster countryname)

		bys countryname : egen z_c_col1 = max(fe1)
		drop fe1
		
		pwcorr z_c_col1 z_c_col1 if newCountry
		matrix corrMat = r(C)
		scalar corr_z1 = corrMat[1,2]
		
		local corrVal : di %4.2f corr_z1
		estadd loc corr_z "`corrVal'", replace 			

		egen tempval = sum((workercountries > 1) * e(sample))
		sum tempval if e(sample)
		local referenceAvg : di %8.0f r(mean) 
			estadd loc migrantsN `referenceAvg', replace 		
		drop tempval

	eststo: reghdfe logbase exp exp_sqrd o_* migrant_delta_gdppw_work_pos if valid_country_premia , absorb(fk_userid fe2=countryname yearofsalary ) vce(cluster countryname)

		bys countryname : egen z_c_col2 = max(fe2)
		drop fe2
		
		pwcorr z_c_col2 z_c_col1 if newCountry
		matrix corrMat = r(C)
		scalar corr_z2 = corrMat[1,2]

		local corrVal : di %4.2f corr_z2
		estadd loc corr_z "`corrVal'", replace 			

		egen tempval = sum((workercountries > 1) * e(sample))
		sum tempval if e(sample)
		local referenceAvg : di %8.0f r(mean) 
			estadd loc migrantsN `referenceAvg', replace 		
		drop tempval

	eststo: reghdfe logbase exp exp_sqrd o_* migrant_delta_gdppw_work ln_dist contig comlang_off sibling_ever col_dep_ever if valid_country_premia , absorb(fk_userid fe3=countryname yearofsalary ) vce(cluster countryname)

		estadd loc bilateralControls "$\checkmark$", replace 	

		bys countryname : egen z_c_col3 = max(fe3)
		drop fe3
		
		pwcorr z_c_col3 z_c_col1 if newCountry
		matrix corrMat = r(C)
		scalar corr_z3 = corrMat[1,2]

		local corrVal : di %4.2f corr_z3
		estadd loc corr_z "`corrVal'", replace 			

		egen tempval = sum((workercountries > 1) * e(sample))
		sum tempval if e(sample)
		local referenceAvg : di %8.0f r(mean) 
			estadd loc migrantsN `referenceAvg', replace 		
		drop tempval
		
	drop o_*
	* Origin FE

	tab coarseOrigin if valid_country_premia == 1, gen(o_)
	foreach my_var of varlist o_*{
		replace `my_var' = 0 if ~(migrated | already_migrated >= 1)
	}	

	eststo: reghdfe logbase exp exp_sqrd o_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid fe4=countryname yearofsalary ) vce(cluster countryname)

		bys countryname : egen z_c_col4 = max(fe4)
		drop fe4
		
		pwcorr z_c_col4 z_c_col1 if newCountry
		matrix corrMat = r(C)
		scalar corr_z4 = corrMat[1,2]

		local corrVal : di %4.2f corr_z4
		estadd loc corr_z "`corrVal'", replace 			

		egen tempval = sum((workercountries > 1) * e(sample))
		sum tempval if e(sample)
		local referenceAvg : di %8.0f r(mean) 
			estadd loc migrantsN `referenceAvg', replace 		
		drop tempval

	eststo: reghdfe logbase exp exp_sqrd o_*  migrant_delta_gdppw_work_pos if valid_country_premia , absorb(fk_userid fe5=countryname yearofsalary ) vce(cluster countryname)

		bys countryname : egen z_c_col5 = max(fe5)
		drop fe5
		
		pwcorr z_c_col5 z_c_col1 if newCountry
		matrix corrMat = r(C)
		scalar corr_z5 = corrMat[1,2]

		local corrVal : di %4.2f corr_z5
		estadd loc corr_z "`corrVal'", replace 			

		egen tempval = sum((workercountries > 1) * e(sample))
		sum tempval if e(sample)
		local referenceAvg : di %8.0f r(mean) 
			estadd loc migrantsN `referenceAvg', replace 		
		drop tempval
	
	eststo: reghdfe logbase exp exp_sqrd o_* migrant_delta_gdppw_work ln_dist contig comlang_off sibling_ever col_dep_ever if valid_country_premia , absorb(fk_userid fe6=countryname yearofsalary ) vce(cluster countryname)

		estadd loc bilateralControls "$\checkmark$", replace 	

		bys countryname : egen z_c_col6 = max(fe6)
		drop fe6
		
		pwcorr z_c_col6 z_c_col1 if newCountry
		matrix corrMat = r(C)
		scalar corr_z6 = corrMat[1,2]

		local corrVal : di %4.2f corr_z6
		estadd loc corr_z "`corrVal'", replace 			

		egen tempval = sum((workercountries > 1) * e(sample))
		sum tempval if e(sample)
		local referenceAvg : di %8.0f r(mean) 
			estadd loc migrantsN `referenceAvg', replace 		
		drop tempval

	label var o_1 "Other Africa"
	label var o_2 "Other Asia"
	label var o_3 "Australia"
	label var o_4 "Canada"
	label var o_5 "Other Europe"
	label var o_6 "France"
	label var o_7 "Germany"
	label var o_8 "India"
	label var o_9 "Ireland"
	label var o_10 "Netherlands"
	label var o_11 "Other North America"
	label var o_12 "Other Oceania"
	label var o_13 "Singapore"
	label var o_14 "Other South America"
	label var o_15 "Spain"
	label var o_16 "Sweden"
	label var o_17 "United Kingdom"
	label var o_18 "United States"
	
	label var migrant_delta_gdppw_work "Absolute difference in log(gdppw) between origin and destination"
	label var migrant_delta_gdppw_work_pos "Increase in log(gdppw) between origin and destination"

	* Store information for plotting
	local texFile = "`tablePath'/Table_B1.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles nonum
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) 
		stats(bilateralControls N migrantsN corr_z r2, 
						fmt(%-18s %9.0g %9.0g %4.2f %3.2f) 
						labels("Gravity controls" 
							   "Observations" 
							   "Observations for migrants" 
							   "Correlation with baseline z" 
							   "R$^2$")) 
		drop(_cons exp exp_sqrd ln_dist contig comlang_off sibling_ever col_dep_ever )
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \noalign{\smallskip}
						& \multicolumn{3}{c}{\shortstack{Destinations}} 
						& \multicolumn{3}{c}{\shortstack{Origins}} \\
						\cmidrule(l){2-4} \cmidrule(l){5-7}  
		)
		posthead(\noalign{\smallskip})
		prefoot(\hline \noalign{\smallskip} )
		postfoot(
			\hline \hline 
			\end{tabular}
		)
	; 
	#delimit cr
	*/

	drop o_* newCountry z_c_col*

********************************************************
* ESTIMATE COUNTRY FE USING DUMMIES
********************************************************

	xi i.countryname , noomit
	
	* Make relative to United States (Make sure dropped country is U.S.)
	drop _Icountryna_99
	
	*------------------------------
	* Cluster by country of work
	*------------------------------

	reghdfe logbase exp exp_sqrd _I* c_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid yearofsalary ) vce(cluster countryname)

	matrix coefMat = e(b)
	matrix varMat = e(V)

	generate coef_z_c = .
	generate sd_z_c = .

	foreach my_var of varlist _I*{
		
		replace coef_z_c = _b[`my_var'] if `my_var' == 1 & _b[`my_var'] != 0
		
	}

	foreach my_var of varlist _I*{
		
		replace sd_z_c = sqrt(varMat["`my_var'","`my_var'"]) if `my_var' == 1 & _b[`my_var'] != 0
		
	}

	replace coef_z_c = 0 if countryname == "United States" & e(sample)
	replace sd_z_c = 0 if countryname == "United States" & e(sample)

	* Level adjust to the two z_c models line up

	summarize fe_country if countryname == "United States"
	scalar coef_avg = r(mean)

	replace coef_z_c = coef_z_c + coef_avg

	* Generate 95% confidence intervals

	generate lower_z_c = coef_z_c - (1.96 * sd_z_c)
	generate upper_z_c = coef_z_c + (1.96 * sd_z_c)

	* Plot 95% confidence intervals

	preserve

		keep if coef_z_c != .
		
		bys countryname : keep if _n == 1
		
		sort coef_z_c
		
		list countryname coef_z_c
		
		generate row = _n
		
		twoway (rcap lower_z_c upper_z_c row, horizontal) ///
			   (scatter row coef_z_c , mcolor(gs2) legend(off) scale(0.5) xscale(range(-0.8(0.1)0.6)) xlabel(-0.8(0.1)0.6) ) , ///
			   ytitle("") xtitle("z_c") ylabel(, nogrid) ///
			   ylabel(1 "Nigeria" 2 "India" 3 "Greece" 4 "Pakistan" 5 "Bangladesh" /// 
					  6 "Sri Lanka" 7 "Iran" 8 "Brazil" 9 "Egypt" 10 "Romania" ///
					  11 "Portugal" 12 "Philippines" 13 "Belgium" 14 "Argentina" 15 "Chile" ///
					  16 "Italy" 17 "Sweden" 18 "Hungary" 19 "Russia" 20 "Poland" ///
					  21 "China" 22 "Mexico" 23 "Israel" 24 "New Zealand" 25 "Spain" ///
					  26 "France" 27 "Japan" 28 "Canada" 29 "Colombia" 30 "Czech Republic" ///
					  31 "Malaysia" 32 "Australia" 33 "Ireland" 34 "Denmark" 35 "United Kingdom" ///
					  36 "Turkey" 37 "Switzerland" 38 "Netherlands" 39 "South Korea" 40 "Germany" ///
					  41 "Hong Kong" 42 "United States" 43 "Thailand" 44 "Qatar" 45 "Singapore" ///
					  46 "United Arab Emirates" 47 "South Africa" 48 "Saudi Arabia" ///
					, angle(0) noticks)
		graph export "`figurePath'/Figure_B1.eps" , replace

	restore

	*------------------------------
	* Cluster by country of work and school 
	*------------------------------
	
	reghdfe logbase exp exp_sqrd _I* c_* migrant_delta_gdppw_work if valid_country_premia , absorb(fk_userid yearofsalary ) vce(cluster countryname school)

	matrix coefMat = e(b)
	matrix varMat = e(V)

	generate coef_z_c_alt = .
	generate sd_z_c_alt = .

	foreach my_var of varlist _I*{
		
		replace coef_z_c_alt = _b[`my_var'] if `my_var' == 1 & _b[`my_var'] != 0
		
	}

	foreach my_var of varlist _I*{
		
		replace sd_z_c_alt = sqrt(varMat["`my_var'","`my_var'"]) if `my_var' == 1 & _b[`my_var'] != 0
		
	}

	replace coef_z_c_alt = 0 if countryname == "United States" & e(sample)
	replace sd_z_c_alt = 0 if countryname == "United States" & e(sample)

	* Level adjust to the two z_c models line up

	summarize fe_country if countryname == "United States"
	scalar coef_avg = r(mean)

	replace coef_z_c_alt = coef_z_c_alt + coef_avg

	*------------------------------
	* Compare the z_c standard errors
	*------------------------------

	preserve
	
		keep if sd_z_c != .
		
		bys countryname: keep if _n == 1
		
		generate ratio_z_c_sd = sd_z_c / sd_z_c_alt
		
		summarize ratio_z_c_sd , detail
	
	restore
	
	*------------------------------
	* Clean up
	*------------------------------
	
	drop coef_z_c sd_z_c upper_z_c lower_z_c

	drop coef_z_c_alt sd_z_c_alt

	drop fe_country

	drop _I*

********************************************************
* ESTIMATE Z_C USING DUMMIES - STEM VS NONSTEM
********************************************************

xi i.countryname , noomit

* Make relative to United States (Make sure dropped country is U.S.)
drop _Icountryna_99

* STEM

reghdfe logbase exp exp_sqrd _I* c_* migrant_delta_gdppw_work if valid_country_premia & majorStem == "Stem", absorb(fk_userid yearofsalary ) vce(cluster countryname)

matrix coefMat = e(b)
matrix varMat = e(V)

generate coef_z_c_stem = .

foreach my_var of varlist _I*{
	
	replace coef_z_c_stem = _b[`my_var'] if `my_var' == 1 & _b[`my_var'] != 0
	
}

replace coef_z_c_stem = 0 if countryname == "United States" 

* NON-STEM

reghdfe logbase exp exp_sqrd _I* c_* migrant_delta_gdppw_work if valid_country_premia & majorStem == "NonStem", absorb(fk_userid yearofsalary ) vce(cluster countryname)

matrix coefMat = e(b)
matrix varMat = e(V)

generate coef_z_c_nonstem = .

foreach my_var of varlist _I*{
	
	replace coef_z_c_nonstem = _b[`my_var'] if `my_var' == 1 & _b[`my_var'] != 0
	
}

replace coef_z_c_nonstem = 0 if countryname == "United States" 

preserve

	keep if coef_z_c_stem != . & coef_z_c_nonstem != .

	bys countryname : gen newCountry = _n == 1

	keep if newCountry == 1
	
	list countryname coef_z_c_*
	
	twoway line coef_z_c_stem coef_z_c_stem ||  ///
		scatter coef_z_c_nonstem coef_z_c_stem , mlabel(work_country_iso) ms(i)  mlabcolor(black) mlabsize(6.5pt) ///
		legend(off) ytitle("price per unit of non-STEM labor") xtitle("price per unit of STEM labor") ///
		yscale(range(-.75 0.5)) ylabel(-0.75(0.25)0.5) 
	graph export "`figurePath'/Figure_B3.eps" , replace
		
restore

********************************************************
* STEP 1B: KEEP ONLY EDUCATION SAMPLE NOW THAT MOVERS HAVE BEEN USED
********************************************************

	keep if valid_educ & hasCountryPremia & school != ""

	generate c_cprime = universitycountry + "-" + countryname

	********************************************************
	* Determine step 2 sample thresholds 
	********************************************************

	bys school: egen schoolObs = sum(hasCountryPremia & valid_educ )

	bys school: egen schoolObsLocal = sum(hasCountryPremia & valid_educ & home_country)

	generate valid_school = schoolObsLocal >= school_thresh 

	********************************************************
	* Determine step 2 sample thresholds by major
	********************************************************

	foreach mjr in "Stem" "NonStem" {

		disp "Estimates for `mjr'"

		bys school: egen schoolObs_`mjr' = sum(hasCountryPremia & valid_educ & majorStem == "`mjr'")

		bys school: egen schoolObsLocal_`mjr' = sum(hasCountryPremia & valid_educ & home_country & majorStem == "`mjr'")
				
		generate valid_school_`mjr' = schoolObsLocal_`mjr' >= school_thresh 

	}
		
	********************************************************
	* ESTIMATE SCHOOL FE CONTROLLING FOR Z_C 
	********************************************************

	reghdfe logbase_minus_z_c exp exp_sqrd if valid_educ & valid_school & home_country & universitycountry != "" , absorb(fe_school=school yearofsalary)

	* Create sample information from regression
	generate sampleStep2 = e(sample) 
	bys school : egen obs_step2 = sum(sampleStep2)

	* Fill in information
	bys school : egen q_j = max(fe_school)

	drop fe_school sampleStep2

	********************************************************
	* ESTIMATE SELECTION 
	********************************************************
	
	* Keep only countries of study with at least threshold emigrants
	reghdfe logbase_minus_z_c exp exp_sqrd if valid_educ & valid_school & universitycountry != "" , absorb(school yearofsalary)
	generate currSample = e(sample) 

	* Determine total emigrants and immigrants by country 
	bys universitycountry : egen universityCountryEmigrants = sum(currSample * (universitycountry != countryname))
	bys countryname : egen workCountryImmigrants = sum(currSample * (universitycountry != countryname))
	generate validSelection = universityCountryEmigrants >= selection_thresh & workCountryImmigrants >= selection_thresh
	
	* Restrict selection to sample only of countries with many emigrants and immigrants
	reghdfe logbase_minus_z_c exp exp_sqrd if valid_educ & valid_school & validSelection & universitycountry != "" , absorb(school yearofsalary)
	generate sampleStep2 = e(sample) 

	bys universitycountry countryname : egen mean_resid_h_c_cprime = mean(logbase_minus_z_c - q_j)
	
	* Determine average h for domestic employed
	generate same_country = mean_resid_h_c_cprime if universitycountry == countryname
	bys universitycountry : egen mean_h_resid_same_country = max(same_country)
	
	* Determine diff in h relative to domestic employed
	generate a_c_cprime = mean_resid_h_c_cprime - mean_h_resid_same_country if validSelection 
	
	* Determine migration shares by school
	bys universitycountry countryname : egen obs_a_c_cprime = sum(sampleStep2)
	bys universitycountry countryname school : egen obs_j_c_cprime = sum(sampleStep2)
	
	drop currSample sampleStep2 same_country mean_resid_h_c_cprime mean_h_resid_same_country
	
	********************************************************
	* MAKE ALL Q_J RELATIVE TO UT AUSTIN
	********************************************************

	foreach my_var of varlist q_j*{
		
		generate utaustin = `my_var' if school == "The University of Texas at Austin"
		
		egen std_utaustin = max(utaustin)
		
		summarize std_utaustin, detail
		scalar std_`my_var' = r(mean)
		
		replace `my_var' = `my_var' - std_utaustin
		
		drop utaustin std_utaustin
	}

	********************************************************
	* 3: Estimate country-major premia together
	********************************************************

		generate uni_country = university_country_iso + "-" + school 
		generate uni_country_major = university_country_iso + "-" + school + "-" + majorStem

		generate validMajor = majorStem == "Stem" | majorStem == "NonStem" 
		
		generate validSchoolMajor = 0
		replace validSchoolMajor  = 1 if valid_school_Stem & majorStem == "Stem" 
		replace validSchoolMajor  = 1 if valid_school_NonStem  & majorStem == "NonStem" 
		
		* COUNTRY-MAJOR TOGETHER
		
		reghdfe logbase_minus_z_c exp exp_sqrd if valid_educ & validSchoolMajor & universitycountry != "" & validMajor & home_country , absorb(unicountry_specific_major=uni_country_major yearofsalary)

		bys uni_country majorStem: egen q_j_combmajor = max(unicountry_specific_major)

		drop validMajor validSchoolMajor 
		
		* Split by major
		
		foreach mjr in "Stem" "NonStem"{

			disp "Estimates for `mjr'"

			generate q_j_temp = q_j_combmajor if majorStem == "`mjr'" 

			bys uni_country : egen q_j_comb_`mjr' = max(q_j_temp)

			drop q_j_temp
		}

	*/
	
********************************************************
* Summarize countries with q_j estimated
********************************************************

preserve

	keep if q_j != .
	
	bys universitycountry : keep if _n == 1
	
	summarize log_gdppw_university_country , detail

	sort log_gdppw_university_country 
	
	generate gdppwuc = exp(log_gdppw_university_country)

	list universitycountry log_gdppw_university_country gdppwuc

restore

********************************************************
* OUTPUT ESTIMATES FOR Q_J
********************************************************

preserve

	keep if q_j != .
	
	keep school university_country_iso world_rank q_j obs_step2
	
	order school university_country_iso world_rank q_j obs_step2
	
	bys school : keep if _n == 1
	
	generate sort_qj = -1 * q_j	
	sort sort_qj
	drop sort_qj

	outsheet using "`estimatePath'\School_q_j_domestic_only.csv" , comma replace

restore

********************************************************
* OUTPUT Q_J FOR PUBLIC RANKING
********************************************************

preserve

	keep if q_j != .
	
	keep school university_country_iso q_j obs_step2
	
	bys school : keep if _n == 1
	
	generate sort_qj = -1 * q_j	
	sort sort_qj
	drop sort_qj
	
	generate ranking = _n
	
	order ranking school university_country_iso q_j obs_step2
	
	rename school college
	rename university_country_iso country
	rename q_j average_human_capital
	rename obs_step2 sample_size
	
	outsheet using "`estimatePath'\College_rankings_average_human_capital.csv" , comma replace

restore
	
********************************************************
* OUTPUT ESTIMATES FOR Q_J BY STEM/NON-STEM WHEN COMBINED
********************************************************

preserve

	keep if q_j != .
	
	bys school : keep if _n == 1
	
	generate q_j_Stem_std = q_j_comb_Stem - std_q_j
	generate q_j_NonStem_std = q_j_comb_NonStem - std_q_j

	* Overall ranking
	
	generate sort_overall = -1 * q_j 
	sort sort_overall
	drop sort_overall
	
	generate rank_overall = _n
	replace rank_overall = . if q_j == .

	* Stem ranking
	
	generate sort_stem = -1 * q_j_Stem_std	
	sort sort_stem
	drop sort_stem
	
	generate rank_stem = _n
	replace rank_stem = . if q_j_Stem_std == .

	* NonStem ranking
	
	generate sort_nonstem = -1 * q_j_NonStem_std	
	sort sort_nonstem
	drop sort_nonstem
	
	generate rank_nonstem = _n
	replace rank_nonstem = . if q_j_NonStem_std == .
	
	* Output rankings
	
	keep school rank_overall q_j rank_stem q_j_Stem_std rank_nonstem q_j_NonStem_std world_rank
	
	order rank_overall rank_stem rank_nonstem school q_j q_j_Stem_std q_j_NonStem_std world_rank
	
	sort rank_overall
	
	outsheet using "`estimatePath'\School_ranks_overall_stem_nonstem.csv" , comma replace
	
restore

********************************************************
* OUTPUT ESTIMATES FOR A_C_CPRIME
********************************************************

preserve

	keep if a_c_cprime != .
	
	keep university_country_iso work_country_iso a_c_cprime 
	
	order university_country_iso work_country_iso a_c_cprime 
	
	bys university_country_iso work_country_iso : generate selectionObs = _N 

	bys university_country_iso work_country_iso : keep if _n == 1
	
	drop if university_country_iso == work_country_iso
	
	generate sort_a_c_cprime = -1 * a_c_cprime
	
	sort sort_a_c_cprime
	
	drop sort_a_c_cprime

	outsheet using "`estimatePath'\Selection_a_c_cprime.csv" , comma replace

restore

********************************************************
* Create summary information for each country of study
********************************************************

preserve

	keep if q_j != .
	
	bys school: generate newUniversity = _n == 1
	
	generate worksHome = universitycountry == countryname
	
	generate worksAbroad = universitycountry != countryname

	* Calculate totals
	
	generate top5pct = national_rank_pct <= 0.05
	
	bys universitycountry : egen totalUniversities = sum(newUniversity)
	
	bys universitycountry : egen totalUniversitiesTop5pct = sum(newUniversity & top5pct)
	
	bys universitycountry : egen totalHome = sum(worksHome)
	
	bys universitycountry : egen totalAbroad = sum(worksAbroad)
	
	generate gdppw_university_country = exp(log_gdppw_university_country)
	
	* One row per county
	
	bys universitycountry : keep if _n == 1
	
	keep universitycountry university_country_iso gdppw_university_country totalUniversities totalUniversitiesTop5pct totalHome totalAbroad
	
	order universitycountry university_country_iso gdppw_university_country totalUniversities totalUniversitiesTop5pct totalHome totalAbroad

	sort gdppw_university_country
	
	outsheet using "`estimatePath'\University_countries_summary.csv" , comma replace
	
restore

********************************************************
* Determine university coverage by country
********************************************************

preserve

	keep if q_j != .

	bys universitycountry school : keep if _n == 1

	by universitycountry: gen totaluniversities = _N

	by universitycountry: egen totaluniversities_top5 = sum(national_rank_pct <= 0.05 & national_rank_pct != .)
	
	generate numberuniversities_top5 = floor(numberuniversities * 0.05)
	
	generate pct_universities = totaluniversities / numberuniversities

	generate pct_universities_top5 = totaluniversities_top5 / numberuniversities_top5

	order universitycountry totaluniversities numberuniversities  pct_universities totaluniversities_top5 numberuniversities_top5 pct_universities_top5

	keep universitycountry totaluniversities numberuniversities  pct_universities totaluniversities_top5 numberuniversities_top5 pct_universities_top5

	bys universitycountry: keep if _n == 1
	
	* Add gdppw  to sort on
	generate country_glassdoor = universitycountry
	
	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	keep if _merge == 3
	drop _merge
	
	drop country_glassdoor iso
	
	order universitycountry log_gdppw_uc
	
	sort log_gdppw_uc
	
	outsheet using "`estimatePath'\Country_university_coverage.csv" , comma replace
	
restore

********************************************************
* Look at relation between country overall and country-major estimates within both
********************************************************

preserve

	keep if q_j != .

	bys universitycountry school: keep if _n == 1
	
	generate country_glassdoor = universitycountry
	
	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	
	bys universitycountry : generate newUniversityCountry = _n == 1
	
	* Average university premia for top percent schools in each country
	foreach topUniversityThresh of numlist 5 {
	
		generate pct_thresh = `topUniversityThresh' / 100
		
		* STEM q_j
		
		bys universitycountry : egen avg_q_j = mean(q_j_comb_Stem) if q_j_comb_Stem != . & national_rank_pct <= pct_thresh & national_rank != .
		bys universitycountry : egen avg_stem_`topUniversityThresh' = max(avg_q_j)
		drop avg_q_j 
		
		* NONSTEM q_j
		
		bys universitycountry : egen avg_q_j = mean(q_j_comb_NonStem) if q_j_comb_NonStem != . & national_rank_pct <= pct_thresh & national_rank != .
		bys universitycountry : egen avg_nonstem_`topUniversityThresh' = max(avg_q_j)
		
		drop avg_q_j 
		
		drop pct_thresh
		
	} 
		
	*--------------------------------------------------
	* Differences
	*--------------------------------------------------
	
	generate stem_minus_nonstem = avg_stem_5 - avg_nonstem_5

	keep if stem_minus_nonstem != . 
	
	twoway scatter stem_minus_nonstem log_gdppw_uc if newUniversityCountry , mlabel(university_country_iso) ms(i) mlabcolor(black) mlabsize(6.5pt) xscale(range(9.375 12.075)) ///
		yscale(range(-.1 0.5)) ylabel(-0.1(0.1)0.5) ///
	legend(off) ytitle("top colleges stem - top colleges non-stem") xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000")  ///
	yline(0, lcolor(black) lwidth(0.1))
	graph export "`figurePath'/Figure_B2.eps" , replace
	
restore

********************************************************
* Look at patents and university quality
********************************************************

preserve

	keep if q_j != .

	bys universitycountry school: keep if _n == 1
	
	generate country_glassdoor = universitycountry
	
	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	drop if _merge == 2
	drop _merge
	
	merge m:1 university_country_iso using "`tempPath'/Country_patents.dta"
	drop if _merge == 2
	drop _merge
		
	*--------------------------------------------------
	* Average university premia for top 10% schools in each country
	*--------------------------------------------------
	
	foreach topUniversityThresh of numlist 2 5 10 25 {
		
		disp `topUniversityThresh'
	
		generate pct_thresh = `topUniversityThresh' / 100
		
		bys universitycountry : egen avg_q_j_`topUniversityThresh' = mean(q_j) if national_rank_pct <= pct_thresh & national_rank != .

		bys universitycountry : egen avg_`topUniversityThresh' = max(avg_q_j_`topUniversityThresh')
		
		drop pct_thresh
	} 
	
	bys university_country_iso : generate newUniversityCountry = _n == 1
			
	*--------------------------------------------------
	* Scatters of q_j and patents per capita
	*--------------------------------------------------
	
	generate patentspc_thous = patentspc * 1000
	
	eststo clear
	
	eststo: reg patentspc_thous log_gdppw_uc if newUniversityCountry 

		sum patentspc_thous if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc avg `referenceMean', replace 		

	foreach my_var of varlist avg_2 avg_5 avg_10 avg_25 {
		
		generate regVar = `my_var'
		
		eststo: reg patentspc_thous regVar log_gdppw_uc if newUniversityCountry 
	    
		sum patentspc_thous if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc avg `referenceMean', replace 		

		drop regVar
		
	}

	generate regVar = 1
	label var regVar "College graduate quality"
	label var log_gdppw_uc "Log(gdppw)"
	
	* Store information for plotting
	local texFile = "`tablePath'/Table_B7.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
		stats(avg N r2, 
						fmt(%4.3 %9.0g %3.2f) 
						labels("Mean patents per thousands of persons"
							   "N" 
							   "R$^2$")) 
		drop(_cons )
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \noalign{\smallskip}
					& 
					& \multicolumn{4}{c}{Incorporating college graduate quality} \\
					\cmidrule(l){3-6} 
					& 
					& \multicolumn{1}{c}{Top 2\%} 
					& \multicolumn{1}{c}{Top 5\%} 
					& \multicolumn{1}{c}{Top 10\%} 
					& \multicolumn{1}{c}{Top 25\%}  \\ \hline
		)
		posthead(\noalign{\smallskip})
		prefoot(\hline \noalign{\smallskip} )
		postfoot(
			\hline \hline \noalign{\smallskip}
			\end{tabular}
		)
	; 
	#delimit cr
	
restore


********************************************************
* Look at within patent differences by university quality -- OLS
********************************************************

preserve

	keep if q_j != .

	* CREATE SAMPLE WEIGHTS FOR OBSERVATIONS FOR MULTIPLE SCHOOLS

	bys mergepatentschool: gen totalObs = _N 

	bys mergepatentschool school: gen schoolPatentObs = _N 
	
	generate sharePatentSample = schoolPatentObs / totalObs
	
	* SETUP FOR REGRESSION SAMPLE

	bys universitycountry school: keep if _n == 1
	
	generate country_glassdoor = universitycountry
	
	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	drop if _merge == 2
	drop _merge

	* REGRESSION OF OUTCOMES ON Q_J FOR MATCHED SAMPLE
	
	eststo clear

	eststo: reg inventor q_j if universitycountry == "United States" [aw=sharePatentSample]

		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		

		sum inventor if e(sample)
		local referenceMean : di %5.4f r(mean) 
		estadd loc ceoavg `referenceMean', replace 	
		
	eststo: reg top5cit q_j if universitycountry == "United States" [aw=sharePatentSample]
	
		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		

		sum top5cit if e(sample)
		local referenceMean : di %5.4f r(mean) 
		estadd loc ceoavg `referenceMean', replace 
		
	label var q_j "College graduate quality"
	
	* Store information for plotting
	local texFile = "`tablePath'/Table_7.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
		stats(ceoavg qjsd N, 
						fmt(%5.4f %3.2f %9.0g) 
						labels("Mean outcome" 
							   "Std. dev. college graduate quality" 
							   "N")) 
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \noalign{\smallskip}
					& \multicolumn{1}{c}{\shortstack{Share\\inventors}} 
					& \multicolumn{1}{c}{\shortstack{Share inventors\\in top 5 percent\\of citations}}  \\ \hline
		)
		posthead(\noalign{\smallskip})
		prefoot(\hline \noalign{\smallskip} )
		postfoot(
			\hline \hline \noalign{\smallskip}
			\end{tabular}
		)
	; 
	#delimit cr

restore

********************************************************
* Look at relation between q_j for top universities and gdppw - % TOP UNIVERSITY
********************************************************

preserve

	keep if q_j != .
	
	bys universitycountry school: keep if _n == 1

	generate country_glassdoor = universitycountry
	
	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	keep if _merge == 3
	drop _merge
	
	bys universitycountry : generate newUniversityCountry = _n == 1
	
	*--------------------------------------------------
	* Average university premia for top schools in each country
	*--------------------------------------------------
	
	foreach topUniversityThresh of numlist 2 5 10 25 {
		
		disp `topUniversityThresh'
	
		generate pct_thresh = `topUniversityThresh' / 100
		
		bys universitycountry : egen avg_q_j_`topUniversityThresh' = mean(q_j) if national_rank_pct <= pct_thresh & national_rank != .

		bys universitycountry : egen avg_`topUniversityThresh' = max(avg_q_j_`topUniversityThresh')
		
		drop pct_thresh
	} 
	
	* Average university premia for all schools in each country
	bys universitycountry : egen avg_q_j_all = mean(q_j) 
	bys universitycountry : egen avg_all = max(avg_q_j_all)
			
	* Plot scatter plot of z_c and gdp
	twoway lfitci avg_5 log_gdppw_uc if newUniversityCountry, clcolor(gs2) || ///
	scatter avg_5 log_gdppw_uc if newUniversityCountry , mlabel(iso) ms(i) mlabcolor(black) legend(off) mlabsize(6.5pt)  xscale(range(9.375 12.075)) ///
		yscale(range(-.62 0.62)) ylabel(-0.6(0.2)0.6) ///
		xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000") ytitle("college graduate quality") 
	graph export "`figurePath'/Figure_2a.eps" , replace
	
	*--------------------------------------------------
	* Regression table on different rankings and LOG GDPPW - with partitioned
	*--------------------------------------------------
	
	generate gdppw_uc = exp(log_gdppw_uc)
	
	eststo clear
	
	eststo: reg avg_2 log_gdppw_uc if newUniversityCountry 
		estadd loc wtd "", replace 					
		pwcorr avg_2 gdppw_uc 
		matrix corrMat = r(C)
		local corrVal : di %3.2f corrMat[1,2]
		estadd loc corrVal "`corrVal'", replace 					

	eststo: reg avg_5 log_gdppw_uc if newUniversityCountry 
		estadd loc wtd "", replace 			
		pwcorr avg_5 gdppw_uc 
		matrix corrMat = r(C)
		local corrVal : di %3.2f corrMat[1,2]
		estadd loc corrVal "`corrVal'", replace 			
					
		summarize log_gdppw_uc if e(sample)
		scalar gdppw_range = r(max) - r(min)
		scalar gdppw_range_logdiff = _b["log_gdppw_uc"] * gdppw_range
		scalar gdppw_range_ppdiff = 100 * (exp(gdppw_range_logdiff) - 1)
		
	eststo: reg avg_10 log_gdppw_uc if newUniversityCountry 
		estadd loc wtd "", replace 			
		pwcorr avg_10 gdppw_uc 
		matrix corrMat = r(C)
		local corrVal : di %3.2f corrMat[1,2]
		estadd loc corrVal "`corrVal'", replace 			

	eststo: reg avg_25 log_gdppw_uc if newUniversityCountry 
		estadd loc wtd "", replace 			
		pwcorr avg_25 gdppw_uc  
		matrix corrMat = r(C)
		local corrVal : di %3.2f corrMat[1,2]
		estadd loc corrVal "`corrVal'", replace 			

	eststo: reg avg_all log_gdppw_uc if newUniversityCountry 
		estadd loc wtd "", replace 			
		pwcorr avg_all gdppw_uc  
		matrix corrMat = r(C)
		local corrVal : di %3.2f corrMat[1,2]
		estadd loc corrVal "`corrVal'", replace 			
	
	label var log_gdppw_uc "Log(gdppw)"
	
	* Store information for plotting
	local texFile = "`tablePath'/Table_5.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
		stats(N r2_a corrVal, 
						fmt(%9.0g %3.2f %3.2f) 
						labels("Countries" 
							   "Adjusted R$^2$"
							   "Corr(gdppw,college graduate quality)")) 
		drop(_cons )
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \noalign{\smallskip}
					& \multicolumn{1}{c}{Top 2\%} 
					& \multicolumn{1}{c}{Top 5\%} 
					& \multicolumn{1}{c}{Top 10\%} 
					& \multicolumn{1}{c}{Top 25\%} 
					& \multicolumn{1}{c}{All}  \\
		)
		posthead(\hline \noalign{\smallskip})
		prefoot(\hline \noalign{\smallskip} )
		postfoot(
			\hline \hline \noalign{\smallskip}
			\end{tabular}
		)
	; 
	#delimit cr

restore

********************************************************
* Look at within country dispersion in quality  - variance of q_j
********************************************************

preserve

	keep if q_j != .
	
	bys universitycountry school: keep if _n == 1

	generate country_glassdoor = universitycountry
	
	merge m:1 country_glassdoor using "C:\Users\jsock\Dropbox\Research\GD\International/Data/Country_gdppw.dta"
	
	* Determine share universities we have for each country
	bys universitycountry : gen totalUni = _N
	
	generate uniShare = totalUni / numberuniversities
	
	bys universitycountry : egen var_qj = sd(q_j) 
		
	bys universitycountry : generate newUniversityCountry = _n == 1
	
	* Variance

	reg var_qj log_gdppw_uc if newUniversityCountry
	
	twoway lfitci var_qj log_gdppw_uc if newUniversityCountry, clcolor(gs2) || ///
	scatter var_qj log_gdppw_uc if newUniversityCountry , mlabel(iso) ms(i)  mlabcolor(black) legend(off) mlabsize(6.5pt) xscale(range(9.375 12.075))   ///
		xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000") 
	graph export "`figurePath'/Figure_3.eps" , replace
	
restore

********************************************************
* Look at relation between q_j for top universities and gdppw
********************************************************

preserve

	keep if q_j != .
	
	bys universitycountry school: keep if _n == 1

	generate country_glassdoor = universitycountry
	
	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	keep if _merge == 3
	drop _merge
			
	bys universitycountry : generate newUniversityCountry = _n == 1

	* Average university premia for top schools in each country
	foreach topUniversityThresh of numlist 5 {
		
		disp `topUniversityThresh'

		bys universitycountry : egen avg_q_j_`topUniversityThresh' = mean(q_j) if national_rank <= `topUniversityThresh' & national_rank != .

		bys universitycountry : egen avg_`topUniversityThresh' = max(avg_q_j_`topUniversityThresh')

	} 
	
	* Plot scatter plot of z_c and gdp
	twoway lfitci avg_5 log_gdppw_uc if newUniversityCountry, clcolor(gs2) || ///
	scatter avg_5 log_gdppw_uc if newUniversityCountry , mlabel(iso) ms(i) mlabsize(6.5pt)  mlabcolor(black) legend(off) xscale(range(9.375 12.075)) ///
		yscale(range(-.62 0.62)) ylabel(-0.6(0.2)0.6) ///
		xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000") ytitle("college graduate quality") 
	graph export "`figurePath'/Figure_2b.eps" , replace
	
restore

********************************************************
* Estimate relation between q_j and CWUR world ranking
********************************************************

preserve

	tab rankBin, gen(r_)
	drop r_8
	
	bys school: generate newUniversity = _n == 1

	eststo clear

	eststo: reg q_j r_* if newUniversity , vce(bootstrap)

		foreach myNum of numlist 1/7{
			tab r_`myNum' if e(sample), matcell(tabResults)
			local nextTotal = tabResults[2,1]
			estadd loc obs_`myNum' "`nextTotal'", replace 			
			disp `obs_`myNum''	    
		}

	label var r_1 "World rank: 1--20"
	label var r_2 "World rank: 21--50"
	label var r_3 "World rank: 51--100"
	label var r_4 "World rank: 101--250"
	label var r_5 "World rank: 251--500"
	label var r_6 "World rank: 501--1000"
	label var r_7 "World rank: 1001--2000"

	* Store information for plotting
	local texFile = "`tablePath'/Table_4_vertical.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
		stats(obs_1 obs_2 obs_3 obs_4 obs_5 obs_6 obs_7 N r2_a, 
						fmt(%9.0g %9.0g %9.0g %9.0g %9.0g %9.0g %9.0g %9.0g %3.2f) 
						labels("N: Rank 1--20" 
							   "N: Rank 21--50" 
							   "N: Rank 51--100" 
							   "N: Rank 101--250" 
							   "N: Rank 251--500" 
							   "N: Rank 501--1000" 
							   "N: Rank 1001--2000" 
							   "N: Total" 
							   "Adjusted R$^2$")) 
		drop(_cons )
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \noalign{\smallskip}
					& \multicolumn{7}{c}{\shortstack{World ranking}} \\
					\cmidrule(l){2-8}  
					& \multicolumn{1}{c}{\shortstack{1--20}} 
					& \multicolumn{1}{c}{\shortstack{21--50}}
					& \multicolumn{1}{c}{\shortstack{51--100}}
					& \multicolumn{1}{c}{\shortstack{101--250}}
					& \multicolumn{1}{c}{\shortstack{251--500}} 
					& \multicolumn{1}{c}{\shortstack{501--1000}} 
					& \multicolumn{1}{c}{\shortstack{1001--2000}} \\
		)
		posthead(\hline \noalign{\smallskip})
		prefoot(\hline \noalign{\smallskip} )
		postfoot(
			\hline \hline \noalign{\smallskip}
			\end{tabular}
		)
	; 
	#delimit cr
	
restore

********************************************************
* Estimate relation between log wage and CWUR world ranking
********************************************************

preserve

	tab rankBin, gen(r_)
	drop r_8
	
	eststo clear

	* FULL SAMPLE
	
	reghdfe logbase_minus_z_c exp exp_sqrd r_* if valid_educ & valid_school & home_country & universitycountry != "", absorb(yearofsalary) vce(cluster school)
	gen currSample = e(sample)

	bys school : egen totalSamp = sum(currSample)
	generate wt = 1 / totalSamp

	eststo: reghdfe logbase_minus_z_c exp exp_sqrd r_* if valid_educ & valid_school & home_country & universitycountry != "" [aw=wt], absorb(yearofsalary) vce(cluster school)
	
	drop currSample totalSamp wt
	
	* 0-2 YEARS EXP
	
	reghdfe logbase_minus_z_c exp exp_sqrd r_* if inrange(exp,0,2) & valid_educ & valid_school & home_country & universitycountry != "", absorb(yearofsalary) vce(cluster school)
	gen currSample = e(sample)

	bys school : egen totalSamp = sum(currSample)
	generate wt = 1 / totalSamp

	eststo: reghdfe logbase_minus_z_c exp exp_sqrd r_* if inrange(exp,0,2) & valid_educ & valid_school & home_country & universitycountry != "" [aw=wt], absorb(yearofsalary) vce(cluster school)

	drop currSample totalSamp wt
	
	* 3-9 YEARS EXP
	
	reghdfe logbase_minus_z_c exp exp_sqrd r_* if inrange(exp,3,9) & valid_educ & valid_school & home_country & universitycountry != "", absorb(yearofsalary) vce(cluster school)
	gen currSample = e(sample)

	bys school : egen totalSamp = sum(currSample)
	generate wt = 1 / totalSamp
	
	eststo: reghdfe logbase_minus_z_c exp exp_sqrd r_* if inrange(exp,3,9) & valid_educ & valid_school & home_country & universitycountry != "" [aw=wt], absorb(yearofsalary) vce(cluster school)

	drop currSample totalSamp wt
	
	* 10+ YEARS EXP
	
	reghdfe logbase_minus_z_c exp exp_sqrd r_* if inrange(exp,10,50) & valid_educ & valid_school & home_country & universitycountry != "", absorb(yearofsalary) vce(cluster school)
	gen currSample = e(sample)

	bys school : egen totalSamp = sum(currSample)
	generate wt = 1 / totalSamp
	
	eststo: reghdfe logbase_minus_z_c exp exp_sqrd r_* if inrange(exp,10,50) & valid_educ & valid_school & home_country & universitycountry != "" [aw=wt], absorb(yearofsalary) vce(cluster school)
		
	drop currSample totalSamp wt
	
	label var r_1 "World rank: 1--20"
	label var r_2 "World rank: 21--50"
	label var r_3 "World rank: 51--100"
	label var r_4 "World rank: 101--250"
	label var r_5 "World rank: 251--500"
	label var r_6 "World rank: 501--1000"
	label var r_7 "World rank: 1001--2000"

	* Store information for plotting
	local texFile = "`tablePath'/Table_B3.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
		stats(N , 
						fmt(%9.0g) 
						labels("N")) 
		drop(_cons exp exp_sqrd)
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \noalign{\smallskip}
					& & \multicolumn{3}{c}{\shortstack{Years of experience}} \\
					\cmidrule(l){3-5}  
					& \multicolumn{1}{c}{\shortstack{All}} 
					& \multicolumn{1}{c}{\shortstack{0--2}}
					& \multicolumn{1}{c}{\shortstack{3--9}}
					& \multicolumn{1}{c}{\shortstack{10+}} \\
		)
		posthead(\hline \noalign{\smallskip})
		prefoot(\hline \noalign{\smallskip} )
		postfoot(
			\hline \hline \noalign{\smallskip}
			\end{tabular}
		)
	; 
	#delimit cr
	
restore
 
********************************************************
* Distribution of wages by CWUR world ranking
********************************************************

preserve

	tab rankBin, gen(r_)
	
	keep if inrange(logbase_minus_z_c,9,13)
	
	twoway kdensity logbase_minus_z_c if r_1 == 1 || ///
		kdensity logbase_minus_z_c if r_2 == 1 | r_3 == 1 || ///
		kdensity logbase_minus_z_c if r_4 == 1 | r_5 == 1 || ///
		kdensity logbase_minus_z_c if r_6 == 1 | r_7 == 1  || ///
		kdensity logbase_minus_z_c if r_8 == 1 , ///
			legend(position(0) bplacement(neast) order(1 "1-20" 2 "21-100" 3 "101-500" 4 "501-2000" 5 "Unranked") rows(5)) ytitle("density") xtitle("log human capital") scale(1.1) xlabel(9(0.5)13)  
	graph export "`figurePath'/Figure_B4.eps"	, replace

restore

********************************************************
* Look at notable achievements by university quality
********************************************************

preserve

	keep if q_j != .

	bys universitycountry school: keep if _n == 1
	
	generate country_glassdoor = universitycountry
	
	merge m:1 country_glassdoor using "`tempPath'/Country_gdppw.dta"
	drop if _merge == 2
	drop _merge

	merge m:1 university_country_iso school using "`tempPath'/University_nobel_prizes.dta"
	drop if _merge == 2
	drop _merge
	
	merge m:1 university_country_iso school using "`tempPath'/University_ceos_2021.dta"
	drop if _merge == 2
	drop _merge
	
	replace university_ceos = 0 if university_ceos == .
	replace nobel_prizes = 0 if nobel_prizes == .
	
	bys universitycountry school: gen newUni = _n == 1
	
	*--------------------------------------------------
	* Regression school q_j
	*--------------------------------------------------
	
	eststo clear	

	tobit nobel_prizes q_j if newUni & universitycountry == "United States", ll(0)
	eststo: margins, dydx(q_j) predict(ystar(0,.)) post
			estadd loc countryFE "", replace 	 			

		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		
	
		sum nobel_prizes if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc newavg `referenceMean', replace 		

	xi: tobit nobel_prizes q_j i.university_country_iso if newUni & universitycountry != "United States", ll(0)
	eststo: margins, dydx(q_j) predict(ystar(0,.)) post
			estadd loc countryFE "$\checkmark$", replace 	
			
		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		
	
		sum nobel_prizes if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc newavg `referenceMean', replace 	

	tobit university_ceos q_j if newUni & universitycountry == "United States", ll(0)
	eststo: margins, dydx(q_j) predict(ystar(0,.)) post
			estadd loc countryFE "", replace 	 			

		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		

		sum university_ceos if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc newavg `referenceMean', replace 		

	xi: tobit university_ceos q_j i.university_country_iso if newUni & universitycountry != "United States", ll(0)
	eststo: margins, dydx(q_j) predict(ystar(0,.)) post
			estadd loc countryFE "$\checkmark$", replace 	
		
		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		
	
		sum university_ceos if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc newavg `referenceMean', replace 		

	label var q_j "College graduate quality"
	
	* Store information for plotting
	local texFile = "`tablePath'/Table_8.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
		stats(countryFE newavg qjsd N , 
						fmt(%-18s %4.3f %3.2f %9.0g) 
						labels("Country FE" 
							   "Mean outcome" 
							   "Std. dev. college graduate quality" 
							   "N" )) 
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \noalign{\smallskip}
					& \multicolumn{2}{c}{\shortstack{Number of\\Nobel laureates}} 
					& \multicolumn{2}{c}{\shortstack{Number of CEOs\\of S\&P 500 firms}} \\ 
					\cmidrule(l){2-3} \cmidrule(l){4-5}
						& \multicolumn{1}{c}{\shortstack{U.S.\\colleges}} 
						& \multicolumn{1}{c}{\shortstack{Non-U.S.\\colleges}} 
						& \multicolumn{1}{c}{\shortstack{U.S.\\colleges}} 
						& \multicolumn{1}{c}{\shortstack{Non-U.S.\\colleges}} \\ \hline
		)
		posthead(\noalign{\smallskip})
		prefoot(\hline \noalign{\smallskip} )
		postfoot(
			\hline \hline \noalign{\smallskip}
			\end{tabular}
		)
	; 
	#delimit cr
	
restore


********************************************************
* Founder/csuite variables from workers' resumes
********************************************************

*--------------------------------------------------
* Merge from resume work experience
*--------------------------------------------------
	
preserve

	clear
	
	insheet using "`dataPath'/User_founder_csuite.csv" , comma
	
	tempfile in_results
	save `in_results', emptyok
	
restore

merge m:1 fk_userid using `in_results'
drop if _merge == 2
drop _merge

*--------------------------------------------------
* Regress school quality on being a founder
*--------------------------------------------------

preserve

	keep if q_j != .

	keep if founder != . & csuite != .
	
	bys fk_userid: keep if _n == 1
	
	keep if birthyear != .
	
	keep if inrange(birthyear,1955,1999)
	
	tab birthyear, gen(b_)
	
	eststo clear
	
	* CREATE CONSISTENT SAMPLES
	
	logit founder b_* q_j if university_country_iso == "USA"
	generate sample1 = e(sample)
	
	logit csuite b_* q_j if university_country_iso == "USA"
	generate sample2 = e(sample)
	
	generate sample_US = sample1 == 1 & sample2 == 1
	drop sample1 sample2 
	
	xi: logit founder i.university_country_iso b_* q_j if university_country_iso != "USA"
	generate sample1 = e(sample)
	
	xi: logit csuite i.university_country_iso b_* q_j if university_country_iso != "USA"
	generate sample2 = e(sample)
	
	generate sample_nonUS = sample1 == 1 & sample2 == 1
	drop sample1 sample2 
	
	* WHETHER PERSON IS FOUNDER
	
	logit founder b_* q_j if sample_US, vce(cluster school)
	eststo: margins, dydx(q_j) post vce(unconditional)
		estadd loc countryFE "", replace 	 			

		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		

		sum founder if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc avg `referenceMean', replace 
		
		sum birthyear if e(sample)
		local referenceMean : di %4.0f r(mean) 
		estadd loc avgyear `referenceMean', replace 
		
	xi: logit founder i.university_country_iso b_* q_j if sample_nonUS, vce(cluster school)
	eststo: margins, dydx(q_j) post vce(unconditional)
		estadd loc countryFE "$\checkmark$", replace 	 			

		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		

		sum founder if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc avg `referenceMean', replace 
		
		sum birthyear if e(sample)
		local referenceMean : di %4.0f r(mean) 
		estadd loc avgyear `referenceMean', replace 
		
	* WHETHER PERSON IS C-SUITE
	
	logit csuite b_* q_j if sample_US, vce(cluster school)
	eststo: margins, dydx(q_j) post vce(unconditional)
		estadd loc countryFE "", replace 	 			

		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		

		sum csuite if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc avg `referenceMean', replace 
	
		sum birthyear if e(sample)
		local referenceMean : di %4.0f r(mean) 
		estadd loc avgyear `referenceMean', replace 
		
	xi: logit csuite i.university_country_iso b_* q_j if sample_nonUS, vce(cluster school)
	eststo: margins, dydx(q_j) post vce(unconditional)
		estadd loc countryFE "$\checkmark$", replace 	 			

		sum q_j if e(sample)
		local referenceSD : di %3.2f r(sd) 
		estadd loc qjsd `referenceSD', replace 		

		sum csuite if e(sample)
		local referenceMean : di %4.3f r(mean) 
		estadd loc avg `referenceMean', replace 
		
		sum birthyear if e(sample)
		local referenceMean : di %4.0f r(mean) 
		estadd loc avgyear `referenceMean', replace 
		
	label var q_j "College graduate quality"
	
	* Store information for plotting
	local texFile = "`tablePath'/Table_6.tex"

	#delimit ; 
	esttab using "`texFile'", b(%6.3f) se(%5.3f) label compress replace obslast depvars nocons nomtitles
		legend star(* 0.10 ** 0.05 *** 0.01) eqlabels(none) nonum
		stats(countryFE avg qjsd N, 
						fmt(%-18s %4.3f %3.2f %9.0g) 
						labels("Country FE" 
							   "Mean outcome" 
							   "Std. dev. college graduate quality" 
							   "N")) 
		prehead(
			\begin{tabular}{l*{@M}{c}} \hline \hline \noalign{\smallskip}
					& \multicolumn{2}{c}{\shortstack{Share entrepreneurs}} 
					& \multicolumn{2}{c}{\shortstack{Share c-suite}} \\
						\cmidrule(l){2-3} \cmidrule(l){4-5} 
					& \multicolumn{1}{c}{\shortstack{U.S.\\colleges}} 
					& \multicolumn{1}{c}{\shortstack{Non-U.S.\\colleges}} 
					& \multicolumn{1}{c}{\shortstack{U.S.\\colleges}} 
					& \multicolumn{1}{c}{\shortstack{Non-U.S.\\colleges}} \\ \hline
		)
		posthead(\noalign{\smallskip})
		prefoot(\hline \noalign{\smallskip} )
		postfoot(
			\hline \hline \noalign{\smallskip}
			\end{tabular}
		)
	; 
	#delimit cr
	
restore

********************************************************
* ESTIMATE SELECTION 
********************************************************

* Keep only countries of study with at least threshold emigrants
reghdfe logbase_minus_z_c exp exp_sqrd if valid_educ & valid_school & universitycountry != "" , absorb(school yearofsalary)
generate currSample = e(sample) 

* Determine total emigrants and immigrants by country 
bys universitycountry : egen N_universityCountryEmigrants = sum(currSample * (universitycountry != countryname))
bys countryname : egen N_workCountryImmigrants = sum(currSample * (universitycountry != countryname))
generate validFlows = N_universityCountryEmigrants >= selection_thresh & N_workCountryImmigrants >= selection_thresh

* Restrict flows to countries with many emigrants and immigrants
reghdfe logbase_minus_z_c exp exp_sqrd if valid_educ & valid_school & validFlows & universitycountry != "" , absorb(school yearofsalary)
generate sampleSelection = e(sample) 

* Define main variables

generate h_ij = logbase_minus_z_c 

generate epsilon_i = h_ij - q_j

*-----------------------------
* Emigration
*-----------------------------

* Selection on q

bys universitycountry: egen avg_q_em = mean(q_j) if universitycountry != countryname & sampleSelection
bys universitycountry: egen q_em = max(avg_q_em) 

bys universitycountry: egen avg_q_nonem = mean(q_j) if universitycountry == countryname & sampleSelection
bys universitycountry: egen q_nonem = max(avg_q_nonem) 

generate sel_em_q = q_em - q_nonem

drop avg_q_em avg_q_nonem q_em q_nonem

* Selection on epsilon
	
bys universitycountry: egen avg_epsilon_em = mean(epsilon_i) if universitycountry != countryname & sampleSelection
bys universitycountry: egen epsilon_em = max(avg_epsilon_em) 

bys universitycountry: egen avg_epsilon_nonem = mean(epsilon_i) if universitycountry == countryname & sampleSelection
bys universitycountry: egen epsilon_nonem = max(avg_epsilon_nonem) 

generate sel_em_epsilon = epsilon_em - epsilon_nonem

drop avg_epsilon_em avg_epsilon_nonem epsilon_em epsilon_nonem

*-----------------------------
* Plot net emigration
*-----------------------------

generate sel_em = sel_em_q + sel_em_epsilon 

preserve

	keep if sampleSelection == 1
	
	bys universitycountry : keep if _n == 1
		
	twoway lfit sel_em log_gdppw_university_country  , clcolor(gs2)   || ///
			scatter sel_em log_gdppw_university_country , ///
			yline(0, lcolor(black) lwidth(0.1)) ///			
			legend(off) ms(i) mlabcolor(black) mlabel(university_country_iso) mlabsize(6.5pt)  xscale(range(9.375 12.075)) ///
		yscale(range(-.6 0.6)) ylabel(-0.6(0.2)0.6) ///
			xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000" ) ytitle("selection on human capital") title("")
	*graph export "`figurePath'/Figure_4a.pdf" , replace
	graph export "`figurePath'/Figure_5a.eps" , replace
	
restore

*-----------------------------
* Immigration
*-----------------------------

* Average q from origin countries

bys universitycountry: egen avg_q_b = mean(q_j) if sampleSelection
bys universitycountry: egen q_b = max(avg_q_b) 

* Average q in receiving countries

bys countryname: egen avg_q_c = mean(q_j) if universitycountry == countryname & sampleSelection
bys countryname: egen q_c = max(avg_q_c) 

* Selection on country

bys countryname: egen avg_country_im = mean(q_b) if universitycountry != countryname & sampleSelection
bys countryname: egen country_im = max(avg_country_im) 

generate sel_im_country = country_im - q_c

drop avg_country_im country_im

* Selection on college given country

bys countryname: egen avg_q_country_im = mean(q_j - q_b) if universitycountry != countryname & sampleSelection
bys countryname: egen q_country_im = max(avg_q_country_im) 

generate sel_im_q_country = q_country_im 

drop avg_q_country_im q_country_im

* Selection on epsilon
	
bys countryname: egen avg_epsilon_im = mean(epsilon_i) if universitycountry != countryname & sampleSelection
bys countryname: egen epsilon_im = max(avg_epsilon_im) 

bys countryname: egen avg_epsilon_nonim = mean(epsilon_i) if universitycountry == countryname & sampleSelection
bys countryname: egen epsilon_nonim = max(avg_epsilon_nonim) 

generate sel_im_epsilon = epsilon_im - epsilon_nonim

drop avg_epsilon_im avg_epsilon_nonim epsilon_im epsilon_nonim

* Reverse immigration selection to align with universitycountry

preserve

	keep if q_j != .
	
	bys countryname : keep if _n == 1

	keep countryname sel_im_q_country sel_im_country sel_im_epsilon
	
	rename countryname universitycountry
	rename sel_im_q_country inv_sel_im_q_country
	rename sel_im_country inv_sel_im_country
	rename sel_im_epsilon inv_sel_im_epsilon

	tempfile temp_invert
	save `temp_invert', emptyok 
	
restore

merge m:1 universitycountry using `temp_invert'
drop _merge

*-----------------------------
* Plot immigration decomposition
*-----------------------------

preserve

	keep if sampleSelection == 1
	
	bys universitycountry : keep if _n == 1
		
	twoway lfit inv_sel_im_country log_gdppw_university_country , clcolor(gs2)   || ///
			scatter inv_sel_im_country log_gdppw_university_country , ///
		legend(off) ms(i) mlabcolor(black) mlabel(university_country_iso) mlabsize(6.5pt) xscale(range(9.375 12.075)) ///
	yscale(range(-.6 0.6)) ylabel(-0.6(0.2)0.6) ///
		yline(0, lcolor(black) lwidth(0.1)) ///			
		xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000" ) ytitle("selection on country") title("")
	*graph export "`figurePath'/Figure_6a.pdf" , replace
	graph export "`figurePath'/Figure_7a.eps" , replace

	twoway lfit inv_sel_im_epsilon log_gdppw_university_country  , clcolor(gs2)   || ///
			scatter inv_sel_im_epsilon log_gdppw_university_country , ///
		legend(off) ms(i) mlabcolor(black) mlabel(university_country_iso) mlabsize(6.5pt)  xscale(range(9.375 12.075)) ///
	yscale(range(-.6 0.6)) ylabel(-0.6(0.2)0.6) ///
		yline(0, lcolor(black) lwidth(0.1)) ///			
		xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000" ) ytitle("selection on ability") title("")
	*graph export "`figurePath'/Figure_6b.pdf" , replace
	graph export "`figurePath'/Figure_7b.eps" , replace

restore

*-----------------------------
* Plot net immigration
*-----------------------------

generate sel_im = inv_sel_im_country + inv_sel_im_epsilon + inv_sel_im_q_country

preserve

	keep if sampleSelection == 1
	
	bys universitycountry : keep if _n == 1
		
	twoway lfit sel_im log_gdppw_university_country, clcolor(gs2)   || ///
			scatter sel_im log_gdppw_university_country, ///
		yline(0, lcolor(black) lwidth(0.1)) ///			
		legend(off) ms(i) mlabcolor(black) mlabel(university_country_iso) mlabsize(6.5pt) xscale(range(9.375 12.075)) ///
	yscale(range(-.6 0.6)) ylabel(-0.6(0.2)0.6) ///
		xtitle("GDP per worker, 2019 intl $") xlabel(9.68 "16000" 10.373 "32000" 11.067 "64000" 11.760 "128000" ) ytitle("selection on human capital") 
	*graph export "`figurePath'/Figure_4b.pdf" , replace
	graph export "`figurePath'/Figure_5b.eps" , replace
	
restore

*-----------------------------
* Read in share of graduates who immigrate/emigrate 
*-----------------------------

preserve

	clear
	
	insheet using "`inputPath'/raw_flows.csv" , comma
		
	drop country_glassdoor
	rename iso university_country_iso
	
	rename imm share_immigrate
	rename emig share_emigrate
	rename net share_net
	
	tempfile temp_immigrants
	save `temp_immigrants', emptyok 
	
restore

merge m:1 university_country_iso using `temp_immigrants'
drop _merge

*-----------------------------
* Calculate emigration/immigration adjusting for human capital 
*-----------------------------

generate pct_emigrate = 100 * share_emigrate
generate pct_emigrate_qadj = 100 * share_emigrate * exp(sel_em)

generate pct_immigrate = 100 * share_immigrate
generate pct_immigrate_qadj = 100 * share_immigrate * exp(sel_im)

generate pct_net = pct_immigrate - pct_emigrate
generate pct_net_qadj = pct_immigrate_qadj - pct_emigrate_qadj

graph bar pct_net pct_net_qadj if pct_net_qadj != . , over(university_country_iso, sort(pct_net) label(angle(90))) scale(*.75) bar(1, fcolor(gs12) lstyle(none)) bar(2, fcolor(gs4) lstyle(none)) legend(label(1 "College graduates") label(2 "Quality-adjusted college-graduate equivalents")) legend(position(0) bplacement(nwest) row(2))  ytitle("net migration (percent)") yscale(range(-25 40)) ylabel(-25(5)40) yline(0, lcolor(black) lwidth(0.1)) 
*graph export "`figurePath'/Figure_5.pdf" , replace
graph export "`figurePath'/Figure_6.eps" , replace

*-----------------------------
* Export adjustment margin results
*-----------------------------

preserve

	keep if q_j != .
	
	keep if sel_em != . 
	
	bys universitycountry : keep if _n == 1
	
	generate uc_gdppw = exp(log_gdppw_university_country)
		
	keep universitycountry uc_gdppw sel_em_q sel_em_epsilon inv_sel_im_q_country inv_sel_im_country inv_sel_im_epsilon pct_emigrate pct_emigrate_qadj pct_immigrate pct_immigrate_qadj pct_net pct_net_qadj
	
	order universitycountry uc_gdppw sel_em_q sel_em_epsilon inv_sel_im_q_country inv_sel_im_country inv_sel_im_epsilon pct_emigrate pct_emigrate_qadj pct_immigrate pct_immigrate_qadj pct_net pct_net_qadj
	
	sort uc_gdppw
	
	outsheet using "`estimatePath'\Emigration_and_immigration.csv" , comma replace

restore

*-----------------------------
* Export migrant flow results
*-----------------------------

preserve

	keep if q_j != .
	
	bys universitycountry : keep if _n == 1

	keep if sel_em != . & pct_emigrate != .
	
	generate uc_gdppw = exp(log_gdppw_university_country)
		
	keep universitycountry uc_gdppw pct_emigrate pct_emigrate_qadj pct_immigrate pct_immigrate_qadj pct_net pct_net_qadj
	
	order universitycountry uc_gdppw pct_emigrate pct_emigrate_qadj pct_immigrate pct_immigrate_qadj pct_net pct_net_qadj
	
	sort uc_gdppw

	outsheet using "`estimatePath'\Migrant_flows.csv" , comma replace

restore

********************************************************
* Look at decomposition adding certain fixed effects
********************************************************
	
	eststo clear

	tab grpmajor , gen(m_)
	
	* Adding major
	reghdfe logbase_minus_z_c exp exp_sqrd m_* if valid_educ & valid_school & home_country & universitycountry != "" , absorb(fe_school=school yearofsalary)

	bys school : egen q_j_major = max(fe_school)
	drop fe_school

	* Variance of q_j

	bys school : gen newSchool = _n == 1
	
	summarize q_j if newSchool
	scalar var_q_j = r(sd) ^ 2

	foreach my_var of varlist q_j_major {

		summarize `my_var' if newSchool
		scalar var_`my_var' = r(sd) ^ 2

	}
